Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix system tests for Dataflow, Vision and Vertex_ai #29

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,27 +24,24 @@

import os
from datetime import datetime
from pathlib import Path

from airflow.models.dag import DAG
from airflow.providers.apache.beam.hooks.beam import BeamRunnerType
from airflow.providers.apache.beam.operators.beam import BeamRunPythonPipelineOperator
from airflow.providers.google.cloud.operators.dataflow import DataflowStopJobOperator
from airflow.providers.google.cloud.operators.gcs import GCSCreateBucketOperator, GCSDeleteBucketOperator
from airflow.providers.google.cloud.transfers.local_to_gcs import LocalFilesystemToGCSOperator
from airflow.utils.trigger_rule import TriggerRule

ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID")
DAG_ID = "dataflow_native_python"

RESOURCE_DATA_BUCKET = "airflow-system-tests-resources"
BUCKET_NAME = f"bucket_{DAG_ID}_{ENV_ID}"

PYTHON_FILE_NAME = "wordcount_debugging.py"
GCS_TMP = f"gs://{BUCKET_NAME}/temp/"
GCS_STAGING = f"gs://{BUCKET_NAME}/staging/"
GCS_OUTPUT = f"gs://{BUCKET_NAME}/output"
GCS_PYTHON_SCRIPT = f"gs://{BUCKET_NAME}/{PYTHON_FILE_NAME}"
PYTHON_FILE_LOCAL_PATH = str(Path(__file__).parent / "resources" / PYTHON_FILE_NAME)
GCS_PYTHON_SCRIPT = f"gs://{RESOURCE_DATA_BUCKET}/dataflow/python/wordcount_debugging.py"
LOCATION = "europe-west3"

default_args = {
Expand All @@ -64,13 +61,6 @@
) as dag:
create_bucket = GCSCreateBucketOperator(task_id="create_bucket", bucket_name=BUCKET_NAME)

upload_file = LocalFilesystemToGCSOperator(
task_id="upload_file_to_bucket",
src=PYTHON_FILE_LOCAL_PATH,
dst=PYTHON_FILE_NAME,
bucket=BUCKET_NAME,
)

# [START howto_operator_start_python_job]
start_python_job = BeamRunPythonPipelineOperator(
runner=BeamRunnerType.DataflowRunner,
Expand All @@ -80,10 +70,10 @@
pipeline_options={
"output": GCS_OUTPUT,
},
py_requirements=["apache-beam[gcp]==2.46.0"],
py_requirements=["apache-beam[gcp]==2.47.0"],
py_interpreter="python3",
py_system_site_packages=False,
dataflow_config={"location": LOCATION},
dataflow_config={"location": LOCATION, "job_name": "start_python_job"},
)
# [END howto_operator_start_python_job]

Expand All @@ -94,7 +84,7 @@
pipeline_options={
"output": GCS_OUTPUT,
},
py_requirements=["apache-beam[gcp]==2.46.0"],
py_requirements=["apache-beam[gcp]==2.47.0"],
py_interpreter="python3",
py_system_site_packages=False,
)
Expand All @@ -114,7 +104,6 @@
(
# TEST SETUP
create_bucket
>> upload_file
# TEST BODY
>> start_python_job
>> start_python_job_local
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@

import os
from datetime import datetime
from pathlib import Path
from typing import Callable

from airflow.exceptions import AirflowException
Expand All @@ -39,20 +38,18 @@
DataflowJobMetricsSensor,
DataflowJobStatusSensor,
)
from airflow.providers.google.cloud.transfers.local_to_gcs import LocalFilesystemToGCSOperator
from airflow.utils.trigger_rule import TriggerRule

ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID")
DAG_ID = "dataflow_native_python_async"

RESOURCE_DATA_BUCKET = "airflow-system-tests-resources"
BUCKET_NAME = f"bucket_{DAG_ID}_{ENV_ID}"

PYTHON_FILE_NAME = "wordcount_debugging.txt"
GCS_TMP = f"gs://{BUCKET_NAME}/temp/"
GCS_STAGING = f"gs://{BUCKET_NAME}/staging/"
GCS_OUTPUT = f"gs://{BUCKET_NAME}/output"
GCS_PYTHON_SCRIPT = f"gs://{BUCKET_NAME}/{PYTHON_FILE_NAME}"
PYTHON_FILE_LOCAL_PATH = str(Path(__file__).parent / "resources" / PYTHON_FILE_NAME)
GCS_PYTHON_SCRIPT = f"gs://{RESOURCE_DATA_BUCKET}/dataflow/python/wordcount_debugging.py"
LOCATION = "europe-west3"

default_args = {
Expand All @@ -72,13 +69,6 @@
) as dag:
create_bucket = GCSCreateBucketOperator(task_id="create_bucket", bucket_name=BUCKET_NAME)

upload_file = LocalFilesystemToGCSOperator(
task_id="upload_file_to_bucket",
src=PYTHON_FILE_LOCAL_PATH,
dst=PYTHON_FILE_NAME,
bucket=BUCKET_NAME,
)

# [START howto_operator_start_python_job_async]
start_python_job_async = BeamRunPythonPipelineOperator(
task_id="start_python_job_async",
Expand All @@ -88,7 +78,7 @@
pipeline_options={
"output": GCS_OUTPUT,
},
py_requirements=["apache-beam[gcp]==2.46.0"],
py_requirements=["apache-beam[gcp]==2.47.0"],
py_interpreter="python3",
py_system_site_packages=False,
dataflow_config={
Expand Down Expand Up @@ -174,7 +164,6 @@ def check_autoscaling_event(autoscaling_events: list[dict]) -> bool:
(
# TEST SETUP
create_bucket
>> upload_file
# TEST BODY
>> start_python_job_async
>> [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,10 @@
"output_topic": f"projects/{PROJECT_ID}/topics/{TOPIC_ID}",
"streaming": True,
},
py_requirements=["apache-beam[gcp]==2.46.0"],
py_requirements=["apache-beam[gcp]==2.47.0"],
py_interpreter="python3",
py_system_site_packages=False,
dataflow_config={"location": LOCATION},
dataflow_config={"location": LOCATION, "job_name": "start_python_job_streaming"},
)
# [END howto_operator_start_streaming_python_job]

Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,8 @@
"data_item_labels": {
"test-labels-name": "test-labels-value",
},
"import_schema_uri": (
"gs://google-cloud-aiplatform/schema/dataset/ioformat/image_bounding_box_io_format_1.0.0.yaml"
),
"gcs_source": {"uris": ["gs://cloud-samples-data/vision/salads.csv"]},
"import_schema_uri": "image_classification_single_label_io_format_1.0.0.yaml",
"gcs_source": {"uris": [f"gs://{DATA_SAMPLE_GCS_BUCKET_NAME}/vertex-ai/image-dataset-flowers.csv"]},
},
]
DATASET_TO_UPDATE = {"display_name": "test-name"}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
# Public bucket holding the sample data
BUCKET_NAME_SRC = "cloud-samples-data"
# Path to the data inside the public bucket
PATH_SRC = "vision/ocr/sign.jpg"
PATH_SRC = "vision/logo/google_logo.jpg"


with DAG(
Expand Down
Loading