Skip to content

Commit

Permalink
Fix system tests for dataflow, vision and vertex_ai
Browse files Browse the repository at this point in the history
  • Loading branch information
VladaZakharova committed Mar 20, 2024
1 parent 29ac05f commit 45a5e21
Show file tree
Hide file tree
Showing 6 changed files with 13 additions and 205 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,27 +24,24 @@

import os
from datetime import datetime
from pathlib import Path

from airflow.models.dag import DAG
from airflow.providers.apache.beam.hooks.beam import BeamRunnerType
from airflow.providers.apache.beam.operators.beam import BeamRunPythonPipelineOperator
from airflow.providers.google.cloud.operators.dataflow import DataflowStopJobOperator
from airflow.providers.google.cloud.operators.gcs import GCSCreateBucketOperator, GCSDeleteBucketOperator
from airflow.providers.google.cloud.transfers.local_to_gcs import LocalFilesystemToGCSOperator
from airflow.utils.trigger_rule import TriggerRule

ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID")
DAG_ID = "dataflow_native_python"

RESOURCE_DATA_BUCKET = "airflow-system-tests-resources"
BUCKET_NAME = f"bucket_{DAG_ID}_{ENV_ID}"

PYTHON_FILE_NAME = "wordcount_debugging.py"
GCS_TMP = f"gs://{BUCKET_NAME}/temp/"
GCS_STAGING = f"gs://{BUCKET_NAME}/staging/"
GCS_OUTPUT = f"gs://{BUCKET_NAME}/output"
GCS_PYTHON_SCRIPT = f"gs://{BUCKET_NAME}/{PYTHON_FILE_NAME}"
PYTHON_FILE_LOCAL_PATH = str(Path(__file__).parent / "resources" / PYTHON_FILE_NAME)
GCS_PYTHON_SCRIPT = f"gs://{RESOURCE_DATA_BUCKET}/dataflow/python/wordcount_debugging.py"
LOCATION = "europe-west3"

default_args = {
Expand All @@ -64,13 +61,6 @@
) as dag:
create_bucket = GCSCreateBucketOperator(task_id="create_bucket", bucket_name=BUCKET_NAME)

upload_file = LocalFilesystemToGCSOperator(
task_id="upload_file_to_bucket",
src=PYTHON_FILE_LOCAL_PATH,
dst=PYTHON_FILE_NAME,
bucket=BUCKET_NAME,
)

# [START howto_operator_start_python_job]
start_python_job = BeamRunPythonPipelineOperator(
runner=BeamRunnerType.DataflowRunner,
Expand All @@ -80,10 +70,10 @@
pipeline_options={
"output": GCS_OUTPUT,
},
py_requirements=["apache-beam[gcp]==2.46.0"],
py_requirements=["apache-beam[gcp]==2.47.0"],
py_interpreter="python3",
py_system_site_packages=False,
dataflow_config={"location": LOCATION},
dataflow_config={"location": LOCATION, "job_name": "start_python_job"},
)
# [END howto_operator_start_python_job]

Expand All @@ -94,7 +84,7 @@
pipeline_options={
"output": GCS_OUTPUT,
},
py_requirements=["apache-beam[gcp]==2.46.0"],
py_requirements=["apache-beam[gcp]==2.47.0"],
py_interpreter="python3",
py_system_site_packages=False,
)
Expand All @@ -114,7 +104,6 @@
(
# TEST SETUP
create_bucket
>> upload_file
# TEST BODY
>> start_python_job
>> start_python_job_local
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@

import os
from datetime import datetime
from pathlib import Path
from typing import Callable

from airflow.exceptions import AirflowException
Expand All @@ -39,20 +38,18 @@
DataflowJobMetricsSensor,
DataflowJobStatusSensor,
)
from airflow.providers.google.cloud.transfers.local_to_gcs import LocalFilesystemToGCSOperator
from airflow.utils.trigger_rule import TriggerRule

ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID")
DAG_ID = "dataflow_native_python_async"

RESOURCE_DATA_BUCKET = "airflow-system-tests-resources"
BUCKET_NAME = f"bucket_{DAG_ID}_{ENV_ID}"

PYTHON_FILE_NAME = "wordcount_debugging.txt"
GCS_TMP = f"gs://{BUCKET_NAME}/temp/"
GCS_STAGING = f"gs://{BUCKET_NAME}/staging/"
GCS_OUTPUT = f"gs://{BUCKET_NAME}/output"
GCS_PYTHON_SCRIPT = f"gs://{BUCKET_NAME}/{PYTHON_FILE_NAME}"
PYTHON_FILE_LOCAL_PATH = str(Path(__file__).parent / "resources" / PYTHON_FILE_NAME)
GCS_PYTHON_SCRIPT = f"gs://{RESOURCE_DATA_BUCKET}/dataflow/python/wordcount_debugging.py"
LOCATION = "europe-west3"

default_args = {
Expand All @@ -72,13 +69,6 @@
) as dag:
create_bucket = GCSCreateBucketOperator(task_id="create_bucket", bucket_name=BUCKET_NAME)

upload_file = LocalFilesystemToGCSOperator(
task_id="upload_file_to_bucket",
src=PYTHON_FILE_LOCAL_PATH,
dst=PYTHON_FILE_NAME,
bucket=BUCKET_NAME,
)

# [START howto_operator_start_python_job_async]
start_python_job_async = BeamRunPythonPipelineOperator(
task_id="start_python_job_async",
Expand All @@ -88,7 +78,7 @@
pipeline_options={
"output": GCS_OUTPUT,
},
py_requirements=["apache-beam[gcp]==2.46.0"],
py_requirements=["apache-beam[gcp]==2.47.0"],
py_interpreter="python3",
py_system_site_packages=False,
dataflow_config={
Expand Down Expand Up @@ -174,7 +164,6 @@ def check_autoscaling_event(autoscaling_events: list[dict]) -> bool:
(
# TEST SETUP
create_bucket
>> upload_file
# TEST BODY
>> start_python_job_async
>> [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,10 @@
"output_topic": f"projects/{PROJECT_ID}/topics/{TOPIC_ID}",
"streaming": True,
},
py_requirements=["apache-beam[gcp]==2.46.0"],
py_requirements=["apache-beam[gcp]==2.47.0"],
py_interpreter="python3",
py_system_site_packages=False,
dataflow_config={"location": LOCATION},
dataflow_config={"location": LOCATION, "job_name": "start_python_job_streaming"},
)
# [END howto_operator_start_streaming_python_job]

Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,8 @@
"data_item_labels": {
"test-labels-name": "test-labels-value",
},
"import_schema_uri": (
"gs://google-cloud-aiplatform/schema/dataset/ioformat/image_bounding_box_io_format_1.0.0.yaml"
),
"gcs_source": {"uris": ["gs://cloud-samples-data/vision/salads.csv"]},
"import_schema_uri": "image_classification_single_label_io_format_1.0.0.yaml",
"gcs_source": {"uris": [f"gs://{DATA_SAMPLE_GCS_BUCKET_NAME}/vertex-ai/image-dataset-flowers.csv"]},
},
]
DATASET_TO_UPDATE = {"display_name": "test-name"}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
# Public bucket holding the sample data
BUCKET_NAME_SRC = "cloud-samples-data"
# Path to the data inside the public bucket
PATH_SRC = "vision/ocr/sign.jpg"
PATH_SRC = "vision/logo/google_logo.jpg"


with DAG(
Expand Down

0 comments on commit 45a5e21

Please sign in to comment.