From 739f335aec06154ada15c5f54ed57665ced9bf75 Mon Sep 17 00:00:00 2001 From: anand Date: Tue, 26 Mar 2024 13:49:55 -0500 Subject: [PATCH 01/28] first pass at operator --- .../compat/starship_operator.py | 145 ++++++++++++++++++ astronomer_starship/starship_api.py | 50 +++--- 2 files changed, 172 insertions(+), 23 deletions(-) create mode 100644 astronomer_starship/compat/starship_operator.py diff --git a/astronomer_starship/compat/starship_operator.py b/astronomer_starship/compat/starship_operator.py new file mode 100644 index 0000000..fceb956 --- /dev/null +++ b/astronomer_starship/compat/starship_operator.py @@ -0,0 +1,145 @@ +from typing import Literal +from logging import getLogger +import requests +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry +from urllib.parse import urljoin + +from airflow.models.baseoperator import BaseOperator +from airflow.utils.state import DagRunState + + +SOURCE_URL = "XXXXX" +ASTRO_URL = "XXXXX" +ASTRO_API_TOKEN = "XXXXX" + +DAG_RUNS = "/api/starship/dag_runs" +TASK_INSTANCES = "/api/starship/task_instances" +DAGS = "/api/starship/dags" + +logger = getLogger(__name__) + + +def session_with_retry(retries=3, backoff_factor=2): + sess = requests.Session() + retry = Retry( + total=retries, + backoff_factor=backoff_factor, + status_forcelist=[500, 502, 503, 504], + ) + sess.mount("http://", HTTPAdapter(max_retries=retry)) + sess.mount("https://", HTTPAdapter(max_retries=retry)) + return sess + + +def _request( + type: Literal["get", "post", "put", "patch"], + endpoint, + auth, + json=None, + params=None, + retries=3, + backoff_factor=2, +): + s = session_with_retry(retries=retries, backoff_factor=backoff_factor) + request_mapping = {"get": s.get, "post": s.post, "put": s.put, "patch": s.patch} + method = request_mapping.get(type) + resp = method(endpoint, params=params, json=json, auth=auth) + logger.info(f"request status {resp.status_code} for endpoint {endpoint}") + return resp + + +# todo: maybe create utility classes? +def get_dags(webserver_url, auth): + dags = urljoin(webserver_url, DAGS) + resp = _request("get", endpoint=dags, auth=auth) + return resp.json() + + +def get_dagruns(webserver_url, dag_id, auth, limit=5) -> dict: + dagrun_endpoint = urljoin(webserver_url, DAG_RUNS) + resp = _request( + type="get", + endpoint=dagrun_endpoint, + auth=auth, + params={"dag_id": dag_id, "limit": limit}, + ) + return resp.json() + + +def set_dagruns(webserver_url: str, auth, dag_runs: list[dict]) -> dict: + dagrun_endpoint = urljoin(webserver_url, DAG_RUNS) + resp = _request( + type="post", endpoint=dagrun_endpoint, auth=auth, json={"dag_runs": dag_runs} + ) + return resp.json() + + +def get_latest_dagrun_state(webserver_url: str, dag_id: str, auth: str) -> str: + latest = get_dagruns(webserver_url=webserver_url, dag_id=dag_id, auth=auth, limit=1) + if latest.status_code != 200: + raise Exception( + f"Retriveing latest dagrun failed with status: {latest.status_code} {latest.text}" + ) + + return latest[0]["state"] + + +# another reason for class to couple dagrun and task instance retrieval limits +def get_task_instances( + webserver_url: str, dag_id: str, auth: str, limit: int = 5 +) -> requests.Response: + task_instances = urljoin(webserver_url, TASK_INSTANCES) + resp = _request( + type="get", + endpoint=task_instances, + auth=auth, + params={"dag_id": dag_id, "limit": limit}, + ) + return resp + + +def set_dag_state( + webserver_url: str, + dag_id: str, + auth, + action=Literal["pause", "unpause"], +): + action_dict = {"pause": True, "unpause": False} + is_paused = action_dict[action] + payload = {"dag_id": dag_id, "is_paused": is_paused} + dag_endpoint = urljoin(webserver_url, DAGS) + return _request(type="patch", endpoint=dag_endpoint, auth=auth, json=payload) + + +def load_dagruns_to_target(source_url, target_url, dag_id, source_auth, target_auth): + state = get_latest_dagrun_state(webserver_url=source_url, dag_id=dag_id) + if state not in (DagRunState.FAILED, DagRunState.SUCCESS): + logger.info( + f"Latest dagrun for {dag_id} is not not in state {(DagRunState.FAILED, DagRunState.SUCCESS)}. Skipping migration." + ) + else: + set_dag_state( + webserver_url=source_url, dag_id=dag_id, action="pause", auth=source_auth + ) + dagruns = get_dagruns(webserver_url=source_url, dag_id=dag_id, auth=source_auth) + set_dagruns( + webserver_url=target_url, dag_runs=dagruns["dag_runs"], auth=target_auth + ) + + +class StarshipOperator(BaseOperator): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def execute(self, context): + conf = context["conf"].as_dict() + all_dags = get_dags(webserver_url=conf["source_url"], auth=conf["source_auth"]) + for dag in all_dags: + load_dagruns_to_target( + dag_id=dag["dag_id"], + source_url=conf["source_url"], + source_auth=conf["source_auth"], + target_url=conf["target_url"], + target_auth=conf["target_auth"], + ) diff --git a/astronomer_starship/starship_api.py b/astronomer_starship/starship_api.py index 3d61716..b7883c3 100644 --- a/astronomer_starship/starship_api.py +++ b/astronomer_starship/starship_api.py @@ -30,9 +30,9 @@ def starship_route( kwargs = ( kwargs_fn( request_method=request_method, - args=request.args - if request_method in ["GET", "POST", "DELETE"] - else {}, + args=( + request.args if request_method in ["GET", "POST", "DELETE"] else {} + ), json=(request.json if request.is_json else {}), ) if kwargs_fn @@ -404,26 +404,30 @@ def dag_runs(self): **Response**: ```json - [ - { - "dag_id": "dag_0", - "queued_at": "1970-01-01T00:00:00+00:00", - "execution_date": "1970-01-01T00:00:00+00:00", - "start_date": "1970-01-01T00:00:00+00:00", - "end_date": "1970-01-01T00:00:00+00:00", - "state": "SUCCESS", - "run_id": "manual__1970-01-01T00:00:00+00:00", - "creating_job_id": 123, - "external_trigger": true, - "run_type": "manual", - "conf": None, - "data_interval_start": "1970-01-01T00:00:00+00:00", - "data_interval_end": "1970-01-01T00:00:00+00:00", - "last_scheduling_decision": "1970-01-01T00:00:00+00:00", - "dag_hash": "...." - }, - ... - ] + { + "dag_run_count": 1, + "dag_runs": + [ + { + "dag_id": "dag_0", + "queued_at": "1970-01-01T00:00:00+00:00", + "execution_date": "1970-01-01T00:00:00+00:00", + "start_date": "1970-01-01T00:00:00+00:00", + "end_date": "1970-01-01T00:00:00+00:00", + "state": "SUCCESS", + "run_id": "manual__1970-01-01T00:00:00+00:00", + "creating_job_id": 123, + "external_trigger": true, + "run_type": "manual", + "conf": None, + "data_interval_start": "1970-01-01T00:00:00+00:00", + "data_interval_end": "1970-01-01T00:00:00+00:00", + "last_scheduling_decision": "1970-01-01T00:00:00+00:00", + "dag_hash": "...." + }, + ... + ] + } ``` ### `POST /api/starship/dag_runs` From 64337c5c48a5749cc6cfaeeff55d052d380e9231 Mon Sep 17 00:00:00 2001 From: anand Date: Tue, 26 Mar 2024 13:57:57 -0500 Subject: [PATCH 02/28] remove unused constants --- astronomer_starship/compat/starship_operator.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/astronomer_starship/compat/starship_operator.py b/astronomer_starship/compat/starship_operator.py index fceb956..c33bf3b 100644 --- a/astronomer_starship/compat/starship_operator.py +++ b/astronomer_starship/compat/starship_operator.py @@ -9,10 +9,6 @@ from airflow.utils.state import DagRunState -SOURCE_URL = "XXXXX" -ASTRO_URL = "XXXXX" -ASTRO_API_TOKEN = "XXXXX" - DAG_RUNS = "/api/starship/dag_runs" TASK_INSTANCES = "/api/starship/task_instances" DAGS = "/api/starship/dags" From f96c355f505d89b08d40bafcab099a4513da2cf3 Mon Sep 17 00:00:00 2001 From: anand Date: Fri, 29 Mar 2024 14:17:31 -0500 Subject: [PATCH 03/28] starship dagrun migration hook --- astronomer_starship/compat/starship_hook.py | 184 ++++++++++++++++++ .../compat/starship_operator.py | 142 +------------- 2 files changed, 193 insertions(+), 133 deletions(-) create mode 100644 astronomer_starship/compat/starship_hook.py diff --git a/astronomer_starship/compat/starship_hook.py b/astronomer_starship/compat/starship_hook.py new file mode 100644 index 0000000..993d303 --- /dev/null +++ b/astronomer_starship/compat/starship_hook.py @@ -0,0 +1,184 @@ +from typing import Literal +from logging import getLogger +import requests +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry +from urllib.parse import urljoin +from airflow.utils.state import DagRunState +from airflow.hooks.base import BaseHook + + +logger = getLogger(__name__) + + +def session_with_retry(retries=3, backoff_factor=2): + sess = requests.Session() + retry = Retry( + total=retries, + backoff_factor=backoff_factor, + status_forcelist=[500, 502, 503, 504], + ) + sess.mount("http://", HTTPAdapter(max_retries=retry)) + sess.mount("https://", HTTPAdapter(max_retries=retry)) + return sess + + +def _request( + type: Literal["get", "post", "put", "patch"], + endpoint, + auth=None, + json=None, + params=None, + headers=None, + retries=3, + backoff_factor=2, +): + s = session_with_retry(retries=retries, backoff_factor=backoff_factor) + request_mapping = {"get": s.get, "post": s.post, "put": s.put, "patch": s.patch} + method = request_mapping.get(type) + resp = method(endpoint, params=params, json=json, auth=auth, headers=headers) + logger.info(f"request status {resp.status_code} for endpoint {endpoint}") + return resp + + +class StarshipAPIHook(BaseHook): + + DAG_RUNS = "/api/starship/dag_runs" + TASK_INSTANCES = "/api/starship/task_instances" + DAGS = "/api/starship/dags" + + def __init__( + self, + webserver_url, + auth=None, + headers=None, + logger_name: str | None = None, + ): + super().__init__(logger_name) + self.webserver_url = webserver_url + self.auth = auth + self.headers = headers + + # todo: maybe create utility classes? + def get_dags(self): + dags = urljoin(self.webserver_url, StarshipAPIHook.DAGS) + resp = _request("get", endpoint=dags, auth=self.auth, headers=self.headers) + return resp.json() + + def get_dagruns(self, dag_id, limit=5) -> dict: + dagrun_endpoint = urljoin(self.webserver_url, StarshipAPIHook.DAG_RUNS) + resp = _request( + type="get", + endpoint=dagrun_endpoint, + auth=self.auth, + headers=self.headers, + params={"dag_id": dag_id, "limit": limit}, + ) + return resp.json() + + def set_dagruns( + self, + dag_runs: list[dict], + ) -> dict: + dagrun_endpoint = urljoin(self.webserver_url, StarshipAPIHook.DAG_RUNS) + resp = _request( + type="post", + endpoint=dagrun_endpoint, + auth=self.auth, + headers=self.headers, + json={"dag_runs": dag_runs}, + ) + return resp.json() + + def get_latest_dagrun_state(self, dag_id) -> str: + latest = self.get_dagruns( + webserver_url=self.webserver_url, + dag_id=dag_id, + auth=self.auth, + headers=self.headers, + limit=1, + ) + if latest.status_code != 200: + raise Exception( + f"Retriveing latest dagrun failed with status: {latest.status_code} {latest.text}" + ) + + return latest[0]["state"] + + # another reason for class to couple dagrun and task instance retrieval limits + def get_task_instances( + self, + dag_id: str, + limit: int = 5, + ) -> requests.Response: + task_instances = urljoin(self.webserver_url, StarshipAPIHook.TASK_INSTANCES) + resp = _request( + type="get", + endpoint=task_instances, + auth=self.auth, + headers=self.headers, + params={"dag_id": dag_id, "limit": limit}, + ) + return resp + + def set_dag_state( + self, + dag_id: str, + action=Literal["pause", "unpause"], + ): + action_dict = {"pause": True, "unpause": False} + is_paused = action_dict[action] + payload = {"dag_id": dag_id, "is_paused": is_paused} + dag_endpoint = urljoin(self.webserver_url, StarshipAPIHook.DAGS) + return _request( + type="patch", + endpoint=dag_endpoint, + auth=self.auth, + headers=self.headers, + json=payload, + ) + + +class StarshipDagRunMigrationHook(BaseHook): + + def __init__( + self, + source_webserver_url: str, + target_webserver_url: str, + source_auth: tuple = None, + target_auth: tuple = None, + source_headers: dict = None, + target_headers: dict = None, + logger_name: str | None = None, + ): + super().__init__(logger_name) + + self.source_api_hook = StarshipAPIHook( + webserver_url=source_webserver_url, auth=source_auth, headers=source_headers + ) + self.target_api_hook = StarshipAPIHook( + webserver_url=target_webserver_url, auth=target_auth, headers=target_headers + ) + + def load_dagruns_to_target( + self, + dag_ids: list[str] = None, + ): + if not dag_ids: + dag_ids = self.source_api_hook.get_dags() + + for dag_id in dag_ids: + state = self.source_api_hook.get_latest_dagrun_state(dag_id=dag_id) + if state not in (DagRunState.FAILED, DagRunState.SUCCESS): + logger.info( + f"Latest dagrun for {dag_id} is not not in state {(DagRunState.FAILED, DagRunState.SUCCESS)}. Skipping migration." + ) + else: + self.source_api_hook.set_dag_state( + dag_id=dag_id, + action="pause", + ) + dag_runs = self.source_api_hook.get_dagruns( + dag_id=dag_id, + ) + self.target_api_hook.set_dagruns(dag_runs=dag_runs["dag_runs"]) diff --git a/astronomer_starship/compat/starship_operator.py b/astronomer_starship/compat/starship_operator.py index c33bf3b..ee9fbe5 100644 --- a/astronomer_starship/compat/starship_operator.py +++ b/astronomer_starship/compat/starship_operator.py @@ -1,141 +1,17 @@ -from typing import Literal -from logging import getLogger -import requests -from requests.adapters import HTTPAdapter -from urllib3.util.retry import Retry -from urllib.parse import urljoin - from airflow.models.baseoperator import BaseOperator -from airflow.utils.state import DagRunState - - -DAG_RUNS = "/api/starship/dag_runs" -TASK_INSTANCES = "/api/starship/task_instances" -DAGS = "/api/starship/dags" - -logger = getLogger(__name__) - - -def session_with_retry(retries=3, backoff_factor=2): - sess = requests.Session() - retry = Retry( - total=retries, - backoff_factor=backoff_factor, - status_forcelist=[500, 502, 503, 504], - ) - sess.mount("http://", HTTPAdapter(max_retries=retry)) - sess.mount("https://", HTTPAdapter(max_retries=retry)) - return sess - - -def _request( - type: Literal["get", "post", "put", "patch"], - endpoint, - auth, - json=None, - params=None, - retries=3, - backoff_factor=2, -): - s = session_with_retry(retries=retries, backoff_factor=backoff_factor) - request_mapping = {"get": s.get, "post": s.post, "put": s.put, "patch": s.patch} - method = request_mapping.get(type) - resp = method(endpoint, params=params, json=json, auth=auth) - logger.info(f"request status {resp.status_code} for endpoint {endpoint}") - return resp - - -# todo: maybe create utility classes? -def get_dags(webserver_url, auth): - dags = urljoin(webserver_url, DAGS) - resp = _request("get", endpoint=dags, auth=auth) - return resp.json() - - -def get_dagruns(webserver_url, dag_id, auth, limit=5) -> dict: - dagrun_endpoint = urljoin(webserver_url, DAG_RUNS) - resp = _request( - type="get", - endpoint=dagrun_endpoint, - auth=auth, - params={"dag_id": dag_id, "limit": limit}, - ) - return resp.json() - -def set_dagruns(webserver_url: str, auth, dag_runs: list[dict]) -> dict: - dagrun_endpoint = urljoin(webserver_url, DAG_RUNS) - resp = _request( - type="post", endpoint=dagrun_endpoint, auth=auth, json={"dag_runs": dag_runs} - ) - return resp.json() - - -def get_latest_dagrun_state(webserver_url: str, dag_id: str, auth: str) -> str: - latest = get_dagruns(webserver_url=webserver_url, dag_id=dag_id, auth=auth, limit=1) - if latest.status_code != 200: - raise Exception( - f"Retriveing latest dagrun failed with status: {latest.status_code} {latest.text}" - ) - - return latest[0]["state"] - - -# another reason for class to couple dagrun and task instance retrieval limits -def get_task_instances( - webserver_url: str, dag_id: str, auth: str, limit: int = 5 -) -> requests.Response: - task_instances = urljoin(webserver_url, TASK_INSTANCES) - resp = _request( - type="get", - endpoint=task_instances, - auth=auth, - params={"dag_id": dag_id, "limit": limit}, - ) - return resp - - -def set_dag_state( - webserver_url: str, - dag_id: str, - auth, - action=Literal["pause", "unpause"], -): - action_dict = {"pause": True, "unpause": False} - is_paused = action_dict[action] - payload = {"dag_id": dag_id, "is_paused": is_paused} - dag_endpoint = urljoin(webserver_url, DAGS) - return _request(type="patch", endpoint=dag_endpoint, auth=auth, json=payload) - - -def load_dagruns_to_target(source_url, target_url, dag_id, source_auth, target_auth): - state = get_latest_dagrun_state(webserver_url=source_url, dag_id=dag_id) - if state not in (DagRunState.FAILED, DagRunState.SUCCESS): - logger.info( - f"Latest dagrun for {dag_id} is not not in state {(DagRunState.FAILED, DagRunState.SUCCESS)}. Skipping migration." - ) - else: - set_dag_state( - webserver_url=source_url, dag_id=dag_id, action="pause", auth=source_auth - ) - dagruns = get_dagruns(webserver_url=source_url, dag_id=dag_id, auth=source_auth) - set_dagruns( - webserver_url=target_url, dag_runs=dagruns["dag_runs"], auth=target_auth - ) +from astronomer_starship.compat.starship_hook import StarshipDagRunMigrationHook class StarshipOperator(BaseOperator): - def __init__(self, *args, **kwargs): + def __init__(self, hook: StarshipDagRunMigrationHook = None, *args, **kwargs): super().__init__(*args, **kwargs) + self.hook = hook def execute(self, context): - conf = context["conf"].as_dict() - all_dags = get_dags(webserver_url=conf["source_url"], auth=conf["source_auth"]) - for dag in all_dags: - load_dagruns_to_target( - dag_id=dag["dag_id"], - source_url=conf["source_url"], - source_auth=conf["source_auth"], - target_url=conf["target_url"], - target_auth=conf["target_auth"], - ) + ctx = context["conf"].as_dict() + + if not self.hook: + self.hook = StarshipDagRunMigrationHook(**ctx) + + return self.hook.load_dagruns_to_target(dag_ids=ctx.get("dag_ids")) From 1e84ca8e57d4befc9c60907b1f77dde88e16e86d Mon Sep 17 00:00:00 2001 From: anand Date: Mon, 1 Apr 2024 09:14:03 -0500 Subject: [PATCH 04/28] Migration hook methods for loading dagruns and TIs --- astronomer_starship/compat/starship_hook.py | 33 +++++++++++++++++---- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/astronomer_starship/compat/starship_hook.py b/astronomer_starship/compat/starship_hook.py index 993d303..10ac5f2 100644 --- a/astronomer_starship/compat/starship_hook.py +++ b/astronomer_starship/compat/starship_hook.py @@ -110,7 +110,7 @@ def get_task_instances( self, dag_id: str, limit: int = 5, - ) -> requests.Response: + ) -> dict: task_instances = urljoin(self.webserver_url, StarshipAPIHook.TASK_INSTANCES) resp = _request( type="get", @@ -119,7 +119,20 @@ def get_task_instances( headers=self.headers, params={"dag_id": dag_id, "limit": limit}, ) - return resp + return resp.json() + + def set_task_instances(self, task_instances: list[dict]): + task_instance_endpoint = urljoin( + self.webserver_url, StarshipAPIHook.TASK_INSTANCES + ) + resp = _request( + type="post", + endpoint=task_instance_endpoint, + auth=self.auth, + headers=self.headers, + json={"task_instances": task_instances}, + ) + return resp.json() def set_dag_state( self, @@ -178,7 +191,15 @@ def load_dagruns_to_target( dag_id=dag_id, action="pause", ) - dag_runs = self.source_api_hook.get_dagruns( - dag_id=dag_id, - ) - self.target_api_hook.set_dagruns(dag_runs=dag_runs["dag_runs"]) + self.get_and_set_dagruns(dag_id) + self.get_and_set_task_instances(dag_id) + + def get_and_set_dagruns(self, dag_id): + dag_runs = self.source_api_hook.get_dagruns( + dag_id=dag_id, + ) + self.target_api_hook.set_dagruns(dag_runs=dag_runs["dag_runs"]) + + def get_and_set_task_instances(self, dag_id): + task_instances = self.source_api_hook.get_task_instances(dag_id=dag_id) + self.target_api_hook.set_task_instances(task_instances=task_instances) From 90a7111e1270eb6e9ad5a6db53463b479c513c1c Mon Sep 17 00:00:00 2001 From: anand Date: Mon, 1 Apr 2024 09:19:14 -0500 Subject: [PATCH 05/28] version bump --- astronomer_starship/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/astronomer_starship/__init__.py b/astronomer_starship/__init__.py index 58c1905..1abd8a1 100644 --- a/astronomer_starship/__init__.py +++ b/astronomer_starship/__init__.py @@ -1,4 +1,4 @@ -__version__ = "2.0.1" +__version__ = "2.0.3" def get_provider_info(): From 8c25b2343997f1e14f433d419512dc425232c3b3 Mon Sep 17 00:00:00 2001 From: anand Date: Mon, 1 Apr 2024 10:00:59 -0500 Subject: [PATCH 06/28] precommit fixes --- astronomer_starship/compat/starship_hook.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/astronomer_starship/compat/starship_hook.py b/astronomer_starship/compat/starship_hook.py index 10ac5f2..0b58694 100644 --- a/astronomer_starship/compat/starship_hook.py +++ b/astronomer_starship/compat/starship_hook.py @@ -1,9 +1,11 @@ -from typing import Literal from logging import getLogger import requests from requests.adapters import HTTPAdapter +from typing import Literal +from textwrap import dedent from urllib3.util.retry import Retry from urllib.parse import urljoin + from airflow.utils.state import DagRunState from airflow.hooks.base import BaseHook @@ -42,7 +44,6 @@ def _request( class StarshipAPIHook(BaseHook): - DAG_RUNS = "/api/starship/dag_runs" TASK_INSTANCES = "/api/starship/task_instances" DAGS = "/api/starship/dags" @@ -153,7 +154,6 @@ def set_dag_state( class StarshipDagRunMigrationHook(BaseHook): - def __init__( self, source_webserver_url: str, @@ -184,7 +184,10 @@ def load_dagruns_to_target( state = self.source_api_hook.get_latest_dagrun_state(dag_id=dag_id) if state not in (DagRunState.FAILED, DagRunState.SUCCESS): logger.info( - f"Latest dagrun for {dag_id} is not not in state {(DagRunState.FAILED, DagRunState.SUCCESS)}. Skipping migration." + dedent( + f"""Latest dagrun for {dag_id} is not not in state + {(DagRunState.FAILED, DagRunState.SUCCESS)}. Skipping migration.""" + ) ) else: self.source_api_hook.set_dag_state( From 062d14e4b4f4510f5cfe68ff863524dd059d8f43 Mon Sep 17 00:00:00 2001 From: anand Date: Wed, 10 Apr 2024 18:17:30 -0500 Subject: [PATCH 07/28] tests and fixes --- astronomer_starship/compat/starship_hook.py | 56 ++++----- .../compat/starship_operator.py | 6 +- tests/operator_test.py | 106 ++++++++++++++++++ 3 files changed, 139 insertions(+), 29 deletions(-) create mode 100644 tests/operator_test.py diff --git a/astronomer_starship/compat/starship_hook.py b/astronomer_starship/compat/starship_hook.py index 0b58694..55d4d9a 100644 --- a/astronomer_starship/compat/starship_hook.py +++ b/astronomer_starship/compat/starship_hook.py @@ -4,7 +4,6 @@ from typing import Literal from textwrap import dedent from urllib3.util.retry import Retry -from urllib.parse import urljoin from airflow.utils.state import DagRunState from airflow.hooks.base import BaseHook @@ -13,6 +12,10 @@ logger = getLogger(__name__) +def urljoin(base: str, endpoint: str) -> str: + return "/".join((base.rstrip("/"), endpoint.lstrip("/"))) + + def session_with_retry(retries=3, backoff_factor=2): sess = requests.Session() retry = Retry( @@ -38,15 +41,19 @@ def _request( s = session_with_retry(retries=retries, backoff_factor=backoff_factor) request_mapping = {"get": s.get, "post": s.post, "put": s.put, "patch": s.patch} method = request_mapping.get(type) + if auth: + auth = tuple(auth) resp = method(endpoint, params=params, json=json, auth=auth, headers=headers) - logger.info(f"request status {resp.status_code} for endpoint {endpoint}") + logger.info( + f"request status {resp.status_code} for {type} on endpoint {endpoint} with text {resp.text}" + ) return resp class StarshipAPIHook(BaseHook): - DAG_RUNS = "/api/starship/dag_runs" - TASK_INSTANCES = "/api/starship/task_instances" - DAGS = "/api/starship/dags" + DAG_RUNS = "api/starship/dag_runs" + TASK_INSTANCES = "api/starship/task_instances" + DAGS = "api/starship/dags" def __init__( self, @@ -61,12 +68,12 @@ def __init__( self.headers = headers # todo: maybe create utility classes? - def get_dags(self): + def get_dags(self) -> dict: dags = urljoin(self.webserver_url, StarshipAPIHook.DAGS) resp = _request("get", endpoint=dags, auth=self.auth, headers=self.headers) return resp.json() - def get_dagruns(self, dag_id, limit=5) -> dict: + def get_dag_runs(self, dag_id, limit=5) -> dict: dagrun_endpoint = urljoin(self.webserver_url, StarshipAPIHook.DAG_RUNS) resp = _request( type="get", @@ -77,7 +84,7 @@ def get_dagruns(self, dag_id, limit=5) -> dict: ) return resp.json() - def set_dagruns( + def set_dag_runs( self, dag_runs: list[dict], ) -> dict: @@ -92,19 +99,14 @@ def set_dagruns( return resp.json() def get_latest_dagrun_state(self, dag_id) -> str: - latest = self.get_dagruns( - webserver_url=self.webserver_url, + latest = self.get_dag_runs( dag_id=dag_id, - auth=self.auth, - headers=self.headers, limit=1, ) - if latest.status_code != 200: - raise Exception( - f"Retriveing latest dagrun failed with status: {latest.status_code} {latest.text}" - ) + logger.info(f"fetching latest dagrun for {dag_id}") + logger.info(f"{latest}") - return latest[0]["state"] + return latest["dag_runs"][0]["state"] # another reason for class to couple dagrun and task instance retrieval limits def get_task_instances( @@ -122,7 +124,7 @@ def get_task_instances( ) return resp.json() - def set_task_instances(self, task_instances: list[dict]): + def set_task_instances(self, task_instances: list[dict]) -> dict: task_instance_endpoint = urljoin( self.webserver_url, StarshipAPIHook.TASK_INSTANCES ) @@ -139,7 +141,7 @@ def set_dag_state( self, dag_id: str, action=Literal["pause", "unpause"], - ): + ) -> requests.Response: action_dict = {"pause": True, "unpause": False} is_paused = action_dict[action] payload = {"dag_id": dag_id, "is_paused": is_paused} @@ -176,9 +178,9 @@ def __init__( def load_dagruns_to_target( self, dag_ids: list[str] = None, - ): + ) -> None: if not dag_ids: - dag_ids = self.source_api_hook.get_dags() + dag_ids = [dag["dag_id"] for dag in self.source_api_hook.get_dags()] for dag_id in dag_ids: state = self.source_api_hook.get_latest_dagrun_state(dag_id=dag_id) @@ -197,12 +199,14 @@ def load_dagruns_to_target( self.get_and_set_dagruns(dag_id) self.get_and_set_task_instances(dag_id) - def get_and_set_dagruns(self, dag_id): - dag_runs = self.source_api_hook.get_dagruns( + def get_and_set_dagruns(self, dag_id: str) -> None: + dag_runs = self.source_api_hook.get_dag_runs( dag_id=dag_id, ) - self.target_api_hook.set_dagruns(dag_runs=dag_runs["dag_runs"]) + self.target_api_hook.set_dag_runs(dag_runs=dag_runs["dag_runs"]) - def get_and_set_task_instances(self, dag_id): + def get_and_set_task_instances(self, dag_id: str) -> None: task_instances = self.source_api_hook.get_task_instances(dag_id=dag_id) - self.target_api_hook.set_task_instances(task_instances=task_instances) + self.target_api_hook.set_task_instances( + task_instances=task_instances["task_instances"] + ) diff --git a/astronomer_starship/compat/starship_operator.py b/astronomer_starship/compat/starship_operator.py index ee9fbe5..e3593c6 100644 --- a/astronomer_starship/compat/starship_operator.py +++ b/astronomer_starship/compat/starship_operator.py @@ -9,9 +9,9 @@ def __init__(self, hook: StarshipDagRunMigrationHook = None, *args, **kwargs): self.hook = hook def execute(self, context): - ctx = context["conf"].as_dict() + conf = context["conf"] if not self.hook: - self.hook = StarshipDagRunMigrationHook(**ctx) + self.hook = StarshipDagRunMigrationHook(**conf) - return self.hook.load_dagruns_to_target(dag_ids=ctx.get("dag_ids")) + return self.hook.load_dagruns_to_target(dag_ids=conf.get("dag_ids")) diff --git a/tests/operator_test.py b/tests/operator_test.py new file mode 100644 index 0000000..43a35b9 --- /dev/null +++ b/tests/operator_test.py @@ -0,0 +1,106 @@ +import datetime +import os +from airflow import DAG +import pytest +from astronomer_starship.compat.starship_compatability import ( + StarshipAirflow, + get_test_data, +) +from astronomer_starship.compat.starship_hook import StarshipAPIHook +from astronomer_starship.compat.starship_operator import StarshipOperator +from tests.conftest import manual_tests +from tests.api_integration_test import ( + get_extras, +) + + +@pytest.fixture +def starship_hook_and_starship( + url_and_token_and_starship, +) -> tuple[StarshipAPIHook, StarshipAirflow]: + (url, token, starship) = url_and_token_and_starship + return StarshipAPIHook(webserver_url=url, **get_extras(url, token)), starship + + +def get_json_test_data(attrs, method=None): + test_data = get_test_data(attrs=attrs, method=method) + json_serializable = { + k: v if not isinstance(v, datetime.datetime) else v.isoformat() + for k, v in test_data.items() + } + return json_serializable + + +class TestStarshipApiHook: + @manual_tests + def test_get_dags(self, starship_hook_and_starship): + hook, _ = starship_hook_and_starship + dags = hook.get_dags() + assert len(dags) > 0, dags + + @manual_tests + def test_set_and_get_dag_runs(self, starship_hook_and_starship): + hook, starship = starship_hook_and_starship + post_payload = get_json_test_data(method="POST", attrs=starship.dag_run_attrs()) + set_runs = hook.set_dag_runs(dag_runs=[post_payload]) + assert set_runs + assert ( + "dag_id" in set_runs + or set_runs["error"] == "Integrity Error (Duplicate Record?)" + ) + get_runs = hook.get_dag_runs(dag_id=post_payload["dag_id"]) + assert get_runs, get_runs == post_payload + + @manual_tests + def test_set_and_get_task_instances(self, starship_hook_and_starship): + hook, starship = starship_hook_and_starship + post_payload = get_json_test_data( + method="POST", attrs=starship.task_instance_attrs() + ) + set_tis = hook.set_task_instances(task_instances=[post_payload]) + assert set_tis + assert ( + "task_instances" in set_tis + or set_tis["error"] == "Integrity Error (Duplicate Record?)" + ) + get_tis = hook.get_task_instances(dag_id=post_payload["dag_id"], limit=1) + assert "dag_run_count" in get_tis, get_tis + assert len(get_tis["task_instances"]) == 1 + + @manual_tests + @pytest.mark.parametrize("action", ["unpause", "pause"]) + def test_patch_dag_state(self, starship_hook_and_starship, action): + hook, _ = starship_hook_and_starship + example_dag = hook.get_dags()[0]["dag_id"] + resp = hook.set_dag_state(dag_id=example_dag, action=action) + assert resp.status_code == 200, "dag_id" in resp.json() + + @manual_tests + def test_get_latest_dagrun_state(self, starship_hook_and_starship): + hook, starship = starship_hook_and_starship + example_dag_run = get_test_data(starship.dag_run_attrs()) + latest_state = hook.get_latest_dagrun_state(dag_id=example_dag_run["dag_id"]) + assert latest_state == example_dag_run["state"] + + +@manual_tests +def test_starship_migration_operator(): + dag = DAG("test_dag", default_args={}) + starship_operator = StarshipOperator( + task_id="test_operator", + dag=dag, + ) + dagrun_conf = { + "source_webserver_url": "http://localhost:8080", + "source_auth": ["admin", "admin"], + "target_webserver_url": os.getenv("TARGET_WEBSERVER_URL"), + "target_headers": ( + {"Authorization": f"Bearer {os.getenv('TARGET_TOKEN')}"} + if os.getenv("TARGET_TOKEN") + else None + ), + "target_auth": os.getenv("TARGET_AUTH"), + } + + ctx = {"conf": dagrun_conf} + starship_operator.execute(ctx) From 303ed9052d50f144215b750f7b0b728889b31968 Mon Sep 17 00:00:00 2001 From: anand Date: Mon, 22 Apr 2024 16:13:57 -0500 Subject: [PATCH 08/28] logging and docs --- astronomer_starship/compat/starship_hook.py | 7 ++++--- astronomer_starship/compat/starship_operator.py | 4 ++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/astronomer_starship/compat/starship_hook.py b/astronomer_starship/compat/starship_hook.py index 55d4d9a..d7e30f5 100644 --- a/astronomer_starship/compat/starship_hook.py +++ b/astronomer_starship/compat/starship_hook.py @@ -44,9 +44,10 @@ def _request( if auth: auth = tuple(auth) resp = method(endpoint, params=params, json=json, auth=auth, headers=headers) - logger.info( - f"request status {resp.status_code} for {type} on endpoint {endpoint} with text {resp.text}" - ) + if resp.status_code != 200: + logger.info( + f"request failed with status {resp.status_code} for {type} on endpoint {endpoint} with text {resp.text}" + ) return resp diff --git a/astronomer_starship/compat/starship_operator.py b/astronomer_starship/compat/starship_operator.py index e3593c6..824090d 100644 --- a/astronomer_starship/compat/starship_operator.py +++ b/astronomer_starship/compat/starship_operator.py @@ -4,6 +4,10 @@ class StarshipOperator(BaseOperator): + """ + Migrate dag run and task run history by using this operator as a task in a DAG + """ + def __init__(self, hook: StarshipDagRunMigrationHook = None, *args, **kwargs): super().__init__(*args, **kwargs) self.hook = hook From c28cec333528d1350a7d39cf5b9aced843645ef2 Mon Sep 17 00:00:00 2001 From: anand Date: Mon, 22 Apr 2024 16:19:15 -0500 Subject: [PATCH 09/28] union --- astronomer_starship/compat/starship_hook.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/astronomer_starship/compat/starship_hook.py b/astronomer_starship/compat/starship_hook.py index d7e30f5..248df88 100644 --- a/astronomer_starship/compat/starship_hook.py +++ b/astronomer_starship/compat/starship_hook.py @@ -1,7 +1,7 @@ from logging import getLogger import requests from requests.adapters import HTTPAdapter -from typing import Literal +from typing import Literal, Union from textwrap import dedent from urllib3.util.retry import Retry @@ -161,8 +161,8 @@ def __init__( self, source_webserver_url: str, target_webserver_url: str, - source_auth: tuple = None, - target_auth: tuple = None, + source_auth: Union[tuple, list] = None, + target_auth: Union[tuple, list] = None, source_headers: dict = None, target_headers: dict = None, logger_name: str | None = None, From 7d93914cabef7506bfaff98bef13faf30b7b9b3a Mon Sep 17 00:00:00 2001 From: anand Date: Thu, 25 Apr 2024 14:49:54 -0500 Subject: [PATCH 10/28] add unpause option --- astronomer_starship/compat/starship_hook.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/astronomer_starship/compat/starship_hook.py b/astronomer_starship/compat/starship_hook.py index 248df88..1b515b3 100644 --- a/astronomer_starship/compat/starship_hook.py +++ b/astronomer_starship/compat/starship_hook.py @@ -165,6 +165,7 @@ def __init__( target_auth: Union[tuple, list] = None, source_headers: dict = None, target_headers: dict = None, + unpause_dags_in_target=False, logger_name: str | None = None, ): super().__init__(logger_name) @@ -175,6 +176,7 @@ def __init__( self.target_api_hook = StarshipAPIHook( webserver_url=target_webserver_url, auth=target_auth, headers=target_headers ) + self.unpause_dags_in_target = unpause_dags_in_target def load_dagruns_to_target( self, @@ -200,6 +202,9 @@ def load_dagruns_to_target( self.get_and_set_dagruns(dag_id) self.get_and_set_task_instances(dag_id) + if self.unpause_dags_in_target: + self.target_api_hook.set_dag_state(dag_id=dag_id, action="unpause") + def get_and_set_dagruns(self, dag_id: str) -> None: dag_runs = self.source_api_hook.get_dag_runs( dag_id=dag_id, From c548b201719b516da62ce12191953fd8f79f3a8a Mon Sep 17 00:00:00 2001 From: fritz-astronomer <80706212+fritz-astronomer@users.noreply.github.com> Date: Wed, 22 May 2024 16:47:46 -0400 Subject: [PATCH 11/28] env var button for software, and query extra info required in Setup page --- astronomer_starship/src/State.jsx | 17 ++ astronomer_starship/src/pages/EnvVarsPage.jsx | 170 +++++++++++++----- astronomer_starship/src/pages/SetupPage.jsx | 67 ++++++- astronomer_starship/src/util.js | 30 +++- 4 files changed, 232 insertions(+), 52 deletions(-) diff --git a/astronomer_starship/src/State.jsx b/astronomer_starship/src/State.jsx index 7e5f0de..0e6da16 100644 --- a/astronomer_starship/src/State.jsx +++ b/astronomer_starship/src/State.jsx @@ -31,6 +31,12 @@ export const initialState = { isProductSelected: false, isTokenTouched: false, token: null, + deploymentId: null, + + // Software Specific: + releaseName: null, + workspaceId: null, + // ### VARIABLES PAGE #### variablesLocalData: [], variablesRemoteData: [], @@ -51,6 +57,7 @@ export const initialState = { envRemoteData: [], envLoading: false, envError: null, + organizationId: null, // ### DAGS PAGE #### dagsData: {}, dagsLoading: false, @@ -108,6 +115,14 @@ export const reducer = (state, action) => { case 'set-is-airflow': { return { ...state, isAirflow: action.isAirflow }; } + case 'set-software-info': { + return { + ...state, + releaseName: action.releaseName, + workspaceId: action.workspaceId, + deploymentId: action.deploymentId, + }; + } // ### VARIABLES PAGE #### case 'set-variables-loading': { @@ -208,6 +223,8 @@ export const reducer = (state, action) => { ...state, envLocalData: action.envLocalData, envRemoteData: action.envRemoteData, + organizationId: action.envRemoteData['ASTRO_ORGANIZATION_ID'] || state.organizationId, + deploymentId: action.envRemoteData['ASTRO_DEPLOYMENT_ID'] || state.deploymentId, envLoading: false, }; } diff --git a/astronomer_starship/src/pages/EnvVarsPage.jsx b/astronomer_starship/src/pages/EnvVarsPage.jsx index a152a23..d0ee444 100644 --- a/astronomer_starship/src/pages/EnvVarsPage.jsx +++ b/astronomer_starship/src/pages/EnvVarsPage.jsx @@ -1,8 +1,7 @@ -/* eslint-disable no-nested-ternary */ import React, { useEffect, useState } from 'react'; import { createColumnHelper } from '@tanstack/react-table'; import { - Text, Button, useToast, Tooltip, HStack, Spacer, + Text, Button, useToast, HStack, Spacer, } from '@chakra-ui/react'; import PropTypes from 'prop-types'; import axios from 'axios'; @@ -11,72 +10,149 @@ import { FaCheck } from 'react-icons/fa'; import { GoUpload } from 'react-icons/go'; import { RepeatIcon } from '@chakra-ui/icons'; import StarshipPage from '../component/StarshipPage'; -import MigrateButton from '../component/MigrateButton'; import { - fetchData, localRoute, proxyHeaders, proxyUrl, remoteRoute, + fetchData, getAstroEnvVarRoute, getHoustonRoute, localRoute, proxyHeaders, proxyUrl, remoteRoute, } from '../util'; import constants from '../constants'; -// noinspection JSUnusedLocalSymbols -export function MigrateEnvButton({ - // eslint-disable-next-line no-unused-vars,react/prop-types - isAstro, route, headers, existsInRemote, sendData, +const getDeploymentsQuery = `query deploymentVariables($deploymentUuid: Uuid!, $releaseName: String!) { + deploymentVariables( + deploymentUuid: $deploymentUuid + releaseName: $releaseName + ) { + key + value + isSecret + } +}`; + +const updateDeploymentVariablesMutation = ` +mutation UpdateDeploymentVariables( + $deploymentUuid:Uuid!, + $releaseName:String!, + $environmentVariables: [InputEnvironmentVariable!]! +) { + updateDeploymentVariables( + deploymentUuid: $deploymentUuid, + releaseName: $releaseName, + environmentVariables: $environmentVariables + ) { + key + value + isSecret + } +}`; + + +function EnvVarMigrateButton({ + route, headers, existsInRemote, sendData, isAstro, deploymentId, releaseName }) { const [loading, setLoading] = useState(false); const [error, setError] = useState(null); const toast = useToast(); const [exists, setExists] = useState(existsInRemote); - function handleClick() { + const errFn = (err) => { + setExists(false); + setLoading(false); + toast({ + title: err.response?.data?.error || err.response?.data || err.message, + status: 'error', + isClosable: true, + }); + setError(err); + } + + function handleSoftwareClick() { + // POST https://houston.BASEDOMAIN/v1 setLoading(true); - axios.post(route, sendData, { headers }) + axios.post( + route, + { + operationName: "deploymentVariables", + query: getDeploymentsQuery, + variables: { + "deploymentUuid": deploymentId, + "releaseName": releaseName, + } + }, + { headers } + ) .then((res) => { - setLoading(false); - setExists(res.status === 200); + let variables = res.data?.data?.deploymentVariables || []; + // TODO - DEDUPE? Check if key already exists and reject + variables.push(sendData); + axios.post( + route, + { + operationName: "UpdateDeploymentVariables", + query: updateDeploymentVariablesMutation, + variables: { + "deploymentUuid": deploymentId, + "releaseName": releaseName, + "environmentVariables": variables, + } + }, + { headers } + ) + .then((res) => { + setLoading(false); + setExists(res.status === 200); + }) + .catch(errFn); }) - .catch((err) => { - setExists(false); - setLoading(false); - toast({ - title: err.response?.data?.error || err.response?.data || err.message, - status: 'error', - isClosable: true, - }); - setError(err); - }); + .catch(errFn); } + function handleAstroClick() { + setLoading(true); + // GET/POST https://api.astronomer.io/platform/v1beta1/organizations/:organizationId/deployments/:deploymentId + axios.get(route, { headers }) + .then((res) => { + // TODO - DEDUPE? Check if key already exists and reject + res.data?.environmentVariables.push(sendData); + axios.post(route, res.data, { headers }) + .then((res) => { + setLoading(false); + setExists(res.status === 200); + }) + .catch(errFn); + }) + .catch(errFn); + } return ( - - - + onClick={() => isAstro ? handleAstroClick() : handleSoftwareClick()} + > + {exists ? 'Ok' : loading ? '' : error ? 'Error!' : 'Migrate'} + ); } -MigrateEnvButton.propTypes = { +EnvVarMigrateButton.propTypes = { route: PropTypes.string.isRequired, headers: PropTypes.objectOf(PropTypes.string), existsInRemote: PropTypes.bool, // eslint-disable-next-line react/forbid-prop-types sendData: PropTypes.object.isRequired, + deploymentId: PropTypes.string, + releaseName: PropTypes.string, }; -MigrateEnvButton.defaultProps = { +EnvVarMigrateButton.defaultProps = { headers: {}, existsInRemote: false, + deploymentId: null, + releaseName: null, }; const columnHelper = createColumnHelper(); @@ -104,9 +180,12 @@ export default function EnvVarsPage({ state, dispatch }) { ); useEffect(() => fetchPageData(), []); useEffect( - () => setData(setEnvData(state.envLocalData, state.envRemoteData)), + () => { + setData(setEnvData(state.envLocalData, state.envRemoteData)) + }, [state], ); + // // noinspection JSCheckFunctionSignatures const columns = [ @@ -117,17 +196,22 @@ export default function EnvVarsPage({ state, dispatch }) { header: 'Migrate', // eslint-disable-next-line react/no-unstable-nested-components cell: (info) => ( - + deploymentId={state.deploymentId} + releaseName={state.releaseName} + /> ), }), ]; diff --git a/astronomer_starship/src/pages/SetupPage.jsx b/astronomer_starship/src/pages/SetupPage.jsx index dbeed73..945b279 100644 --- a/astronomer_starship/src/pages/SetupPage.jsx +++ b/astronomer_starship/src/pages/SetupPage.jsx @@ -19,15 +19,70 @@ import { FormHelperText, InputRightElement, useColorMode, Spacer, } from '@chakra-ui/react'; -import React from 'react'; +import React, { useEffect } from 'react'; import PropTypes from 'prop-types'; import { CheckIcon, ExternalLinkIcon, RepeatIcon, } from '@chakra-ui/icons'; -import { getTargetUrlFromParts, tokenUrlFromAirflowUrl } from '../util'; +import { getHoustonRoute, getTargetUrlFromParts, proxyHeaders, proxyUrl, tokenUrlFromAirflowUrl } from '../util'; import ValidatedUrlCheckbox from '../component/ValidatedUrlCheckbox'; +import axios from "axios"; + +const workspaceDeploymentsQuery = { + operationName: "workspaces", + query: ` +query workspaces { + workspaces { + id + deployments { + id + releaseName + } + } +}`, + variables: {} +}; export default function SetupPage({ state, dispatch }) { + // Get the workspace ID & etc. if it's software and setup is completed + useEffect( + () => { + if ( + state.isSetupComplete && // setup is completed + !state.isAstro && // it's Software + !(state.releaseName && state.workspaceId && state.deploymentId) // one or more of three isn't set + ){ + axios.post( + proxyUrl(getHoustonRoute(state.urlOrgPart)), + workspaceDeploymentsQuery, + { + headers: proxyHeaders(state.token) + } + ) + .then((res) => { + let found = false; + for (let workspace of res.data?.data?.workspaces) { + if (found) break; + for (let deployment of workspace.deployments) { + if (found) break; + if (deployment.releaseName === state.urlDeploymentPart) { + dispatch({ + type: 'set-software-info', + deploymentId: deployment.id, + releaseName: deployment.releaseName, + workspaceId: workspace.id + }); + } + } + } + res.data?.data?.workspaces + }) + .catch((err) => {}); + } + }, + [state], + ); + return ( @@ -125,12 +180,12 @@ export default function SetupPage({ state, dispatch }) { /home ) : ( - // Software URL Template: https://astro.basedomain.com/space-name-1234/airflow/ + // Software URL Template: https://deployments.basedomain.com/space-name-1234/airflow/home - https:// + https://deployments. / Date: Wed, 22 May 2024 16:48:21 -0400 Subject: [PATCH 12/28] better toast for validation checkbox, only allow tabs if all data is good --- astronomer_starship/src/State.jsx | 17 ++++++++--- .../src/component/ValidatedUrlCheckbox.jsx | 28 +++++++++++++++---- .../src/pages/DAGHistoryPage.jsx | 2 +- 3 files changed, 36 insertions(+), 11 deletions(-) diff --git a/astronomer_starship/src/State.jsx b/astronomer_starship/src/State.jsx index 0e6da16..9e12be0 100644 --- a/astronomer_starship/src/State.jsx +++ b/astronomer_starship/src/State.jsx @@ -86,7 +86,7 @@ export const reducer = (state, action) => { urlDeploymentPart: action.urlDeploymentPart, urlOrgPart: action.urlOrgPart, isValidUrl: action.urlOrgPart && action.urlDeploymentPart, - isSetupComplete: action.urlOrgPart && action.urlDeploymentPart && state.token, + isSetupComplete: state.isStarship && state.isAirflow && state.token && action.urlOrgPart && action.urlDeploymentPart, }; } case 'set-token': { @@ -94,7 +94,7 @@ export const reducer = (state, action) => { ...state, isTokenTouched: true, token: action.token, - isSetupComplete: action.token && state.isValidUrl, + isSetupComplete: state.isStarship && state.isAirflow && action.token && state.isValidUrl, }; } case 'toggle-is-astro': { @@ -103,6 +103,7 @@ export const reducer = (state, action) => { isAstro: !state.isAstro, isProductSelected: true, targetUrl: getTargetUrlFromParts(state.urlOrgPart, state.urlDeploymentPart, !state.isAstro), + token: null, isSetupComplete: false, }; } @@ -110,10 +111,18 @@ export const reducer = (state, action) => { return { ...state, isProductSelected: true }; } case 'set-is-starship': { - return { ...state, isStarship: action.isStarship }; + return { + ...state, + isStarship: action.isStarship, + isSetupComplete: action.isStarship && state.isAirflow && state.token && state.isValidUrl, + }; } case 'set-is-airflow': { - return { ...state, isAirflow: action.isAirflow }; + return { + ...state, + isAirflow: action.isAirflow, + isSetupComplete: action.isAirflow && state.isStarship && state.token && state.isValidUrl, + }; } case 'set-software-info': { return { diff --git a/astronomer_starship/src/component/ValidatedUrlCheckbox.jsx b/astronomer_starship/src/component/ValidatedUrlCheckbox.jsx index 3fde2e4..3e002a0 100644 --- a/astronomer_starship/src/component/ValidatedUrlCheckbox.jsx +++ b/astronomer_starship/src/component/ValidatedUrlCheckbox.jsx @@ -12,13 +12,29 @@ export default function ValidatedUrlCheckbox({ useEffect(() => { // noinspection JSCheckFunctionSignatures axios.get(proxyUrl(url), { headers: proxyHeaders(token) }) - .then((res) => setValid(res.status === 200)) + .then((res) => { + // Valid if it's a 200, has data, and is JSON + const isValid = ( + res.status === 200 && + res.data && + (res.headers['content-type'] === 'application/json' || res.data === "OK") + ); + setValid(isValid); + }) .catch((err) => { - toast({ - title: err.response?.data?.error || err.response?.data || err.message, - status: 'error', - isClosable: true, - }); + if (err.response.status === 404) { + toast({ + title: 'Not found', + status: 'error', + isClosable: true, + }); + } else { + toast({ + title: err.response?.data?.error || err.message || err.response?.data, + status: 'error', + isClosable: true, + }); + } setValid(false); }) .finally(() => setLoading.off()); diff --git a/astronomer_starship/src/pages/DAGHistoryPage.jsx b/astronomer_starship/src/pages/DAGHistoryPage.jsx index 1f73a8e..276f365 100644 --- a/astronomer_starship/src/pages/DAGHistoryPage.jsx +++ b/astronomer_starship/src/pages/DAGHistoryPage.jsx @@ -160,7 +160,7 @@ DAGHistoryMigrateButton.defaultProps = { isDisabled: false, }; -export function setDagData(localData, remoteData, key = 'dag_id') { +function setDagData(localData, remoteData, key = 'dag_id') { const output = {}; localData.forEach((i) => { const keyValue = i[key]; From 192a95a2da1fce7dcdbdb7d99a73dddafa9b00a7 Mon Sep 17 00:00:00 2001 From: fritz-astronomer <80706212+fritz-astronomer@users.noreply.github.com> Date: Thu, 23 May 2024 12:57:17 -0400 Subject: [PATCH 13/28] add additional error handling while fetching data, remove infinite loading spinner --- astronomer_starship/src/State.jsx | 5 +++++ astronomer_starship/src/util.js | 11 ++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/astronomer_starship/src/State.jsx b/astronomer_starship/src/State.jsx index 9e12be0..127ffd8 100644 --- a/astronomer_starship/src/State.jsx +++ b/astronomer_starship/src/State.jsx @@ -155,6 +155,7 @@ export const reducer = (state, action) => { return action.error.response.status === 401 ? { ...state, variablesError: action.error, + variablesLoading: false, isSetupComplete: false, isTokenTouched: false, token: null, @@ -183,6 +184,7 @@ export const reducer = (state, action) => { return action.error.response.status === 401 ? { ...state, connectionsError: action.error, + connectionsLoading: false, isSetupComplete: false, isTokenTouched: false, token: null, @@ -211,6 +213,7 @@ export const reducer = (state, action) => { return action.error.response.status === 401 ? { ...state, poolsError: action.error, + poolsLoading: false, isSetupComplete: false, isTokenTouched: false, token: null, @@ -241,6 +244,7 @@ export const reducer = (state, action) => { return action.error.response.status === 401 ? { ...state, envError: action.error, + envLoading: false, isSetupComplete: false, isTokenTouched: false, token: null, @@ -267,6 +271,7 @@ export const reducer = (state, action) => { return action.error.response.status === 401 ? { ...state, dagsError: action.error, + dagsLoading: false, isSetupComplete: false, isTokenTouched: false, token: null, diff --git a/astronomer_starship/src/util.js b/astronomer_starship/src/util.js index d56a946..6c2dbad 100644 --- a/astronomer_starship/src/util.js +++ b/astronomer_starship/src/util.js @@ -101,7 +101,16 @@ export function fetchData( .then((res) => { axios .get(proxyUrl(remoteRouteUrl), { headers: proxyHeaders(token) }) - .then((rRes) => dataDispatch(res, rRes)) // , dispatch)) + .then((rRes) => { + if ( + res.status === 200 && res.headers['content-type'] === 'application/json' && + rRes.status === 200 && res.headers['content-type'] === 'application/json' + ){ + dataDispatch(res, rRes) + } else { + errorDispatch('Invalid response'); + } + }) // , dispatch)) .catch((err) => errorDispatch(err)); // , dispatch)); }) .catch((err) => errorDispatch(err)); // , dispatch)); From 1e5197a235d75643d899effd6134099ce8dd5934 Mon Sep 17 00:00:00 2001 From: fritz-astronomer <80706212+fritz-astronomer@users.noreply.github.com> Date: Thu, 23 May 2024 14:09:36 -0400 Subject: [PATCH 14/28] hide env vars. Max width on table columns to fix overflow --- .../src/component/DataTable.jsx | 102 +++++++++--------- astronomer_starship/src/pages/EnvVarsPage.jsx | 6 +- 2 files changed, 57 insertions(+), 51 deletions(-) diff --git a/astronomer_starship/src/component/DataTable.jsx b/astronomer_starship/src/component/DataTable.jsx index 4ea72af..76051ac 100644 --- a/astronomer_starship/src/component/DataTable.jsx +++ b/astronomer_starship/src/component/DataTable.jsx @@ -1,6 +1,6 @@ import * as React from 'react'; import { - Table, Thead, Tbody, Tr, Th, Td, chakra, + Table, Thead, Tbody, Tr, Th, Td, chakra, TableContainer, } from '@chakra-ui/react'; import { TriangleDownIcon, TriangleUpIcon } from '@chakra-ui/icons'; import { @@ -21,58 +21,60 @@ export default function DataTable({ getCoreRowModel: getCoreRowModel(), onSortingChange: setSorting, getSortedRowModel: getSortedRowModel(), - state: { sorting }, + state: {sorting}, }); return ( - - - {table.getHeaderGroups().map((headerGroup) => ( - - {headerGroup.headers.map((header) => { - // see https://tanstack.com/table/v8/docs/api/core/column-def#meta to type this correctly - const { meta } = header.column.columnDef; - return ( - + ); + })} + + ))} + + + {table.getRowModel().rows.map((row) => ( + + {row.getVisibleCells().map((cell) => { + // see https://tanstack.com/table/v8/docs/api/core/column-def#meta to type this correctly + const {meta} = cell.column.columnDef; + return ( + + ); + })} + + ))} + +
- {flexRender( - header.column.columnDef.header, - header.getContext(), - )} + + + + {table.getHeaderGroups().map((headerGroup) => ( + + {headerGroup.headers.map((header) => { + // see https://tanstack.com/table/v8/docs/api/core/column-def#meta to type this correctly + const {meta} = header.column.columnDef; + return ( + - ); - })} - - ))} - - - {table.getRowModel().rows.map((row) => ( - - {row.getVisibleCells().map((cell) => { - // see https://tanstack.com/table/v8/docs/api/core/column-def#meta to type this correctly - const { meta } = cell.column.columnDef; - return ( - - ); - })} - - ))} - -
+ {flexRender( + header.column.columnDef.header, + header.getContext(), + )} - - {header.column.getIsSorted() ? ( - header.column.getIsSorted() === 'desc' ? ( - - ) : ( - - ) - ) : null} - -
- {flexRender(cell.column.columnDef.cell, cell.getContext())} -
+ + {header.column.getIsSorted() ? ( + header.column.getIsSorted() === 'desc' ? ( + + ) : ( + + ) + ) : null} + +
+ {flexRender(cell.column.columnDef.cell, cell.getContext())} +
+ ); } diff --git a/astronomer_starship/src/pages/EnvVarsPage.jsx b/astronomer_starship/src/pages/EnvVarsPage.jsx index d0ee444..c943106 100644 --- a/astronomer_starship/src/pages/EnvVarsPage.jsx +++ b/astronomer_starship/src/pages/EnvVarsPage.jsx @@ -14,6 +14,7 @@ import { fetchData, getAstroEnvVarRoute, getHoustonRoute, localRoute, proxyHeaders, proxyUrl, remoteRoute, } from '../util'; import constants from '../constants'; +import HiddenValue from "../component/HiddenValue.jsx"; const getDeploymentsQuery = `query deploymentVariables($deploymentUuid: Uuid!, $releaseName: String!) { deploymentVariables( @@ -156,6 +157,9 @@ EnvVarMigrateButton.defaultProps = { }; const columnHelper = createColumnHelper(); +const valueColumn = columnHelper.accessor('value', { + id: 'value', cell: (props) => , +}); function setEnvData(localData, remoteData) { return Object.entries(localData).map( @@ -190,7 +194,7 @@ export default function EnvVarsPage({ state, dispatch }) { // noinspection JSCheckFunctionSignatures const columns = [ columnHelper.accessor('key'), - columnHelper.accessor('value'), + valueColumn, columnHelper.display({ id: 'migrate', header: 'Migrate', From 2c007541e2999827648ab45e8298afa9439f97e1 Mon Sep 17 00:00:00 2001 From: fritz-astronomer <80706212+fritz-astronomer@users.noreply.github.com> Date: Thu, 23 May 2024 16:47:43 -0400 Subject: [PATCH 15/28] add Telescope page --- astronomer_starship/src/App.jsx | 4 +- astronomer_starship/src/State.jsx | 16 ++ astronomer_starship/src/constants.js | 1 + astronomer_starship/src/pages/SetupPage.jsx | 10 ++ .../src/pages/TelescopePage.jsx | 151 ++++++++++++++++++ astronomer_starship/starship_api.py | 101 +++++++++++- 6 files changed, 278 insertions(+), 5 deletions(-) create mode 100644 astronomer_starship/src/pages/TelescopePage.jsx diff --git a/astronomer_starship/src/App.jsx b/astronomer_starship/src/App.jsx index 6ae7a51..580e644 100644 --- a/astronomer_starship/src/App.jsx +++ b/astronomer_starship/src/App.jsx @@ -19,6 +19,7 @@ import { } from './State'; import './index.css'; import AppLoading from './component/AppLoading'; +import TelescopePage from './pages/TelescopePage'; export default function App() { const [state, dispatch] = useReducer(reducer, initialState, getInitialState); @@ -118,7 +119,8 @@ export default function App() { } /> } /> } /> - , + } /> + , ), ); return ( diff --git a/astronomer_starship/src/State.jsx b/astronomer_starship/src/State.jsx index 127ffd8..5a5014e 100644 --- a/astronomer_starship/src/State.jsx +++ b/astronomer_starship/src/State.jsx @@ -32,6 +32,8 @@ export const initialState = { isTokenTouched: false, token: null, deploymentId: null, + telescopeOrganizationId: '', + telescopePresignedUrl: '', // Software Specific: releaseName: null, @@ -133,6 +135,20 @@ export const reducer = (state, action) => { }; } + // ### Telescope ### + case 'set-telescope-org': { + return { + ...state, + telescopeOrganizationId: action.telescopeOrganizationId, + }; + } + case 'set-telescope-presigned-url': { + return { + ...state, + telescopePresignedUrl: action.telescopePresignedUrl, + }; + } + // ### VARIABLES PAGE #### case 'set-variables-loading': { return { diff --git a/astronomer_starship/src/constants.js b/astronomer_starship/src/constants.js index 68746f1..873cba8 100644 --- a/astronomer_starship/src/constants.js +++ b/astronomer_starship/src/constants.js @@ -1,4 +1,5 @@ const constants = { + TELESCOPE_ROUTE: '/api/starship/telescope', ENV_VAR_ROUTE: '/api/starship/env_vars', POOL_ROUTE: '/api/starship/pools', CONNECTIONS_ROUTE: '/api/starship/connections', diff --git a/astronomer_starship/src/pages/SetupPage.jsx b/astronomer_starship/src/pages/SetupPage.jsx index 945b279..05834f4 100644 --- a/astronomer_starship/src/pages/SetupPage.jsx +++ b/astronomer_starship/src/pages/SetupPage.jsx @@ -24,6 +24,8 @@ import PropTypes from 'prop-types'; import { CheckIcon, ExternalLinkIcon, RepeatIcon, } from '@chakra-ui/icons'; +import { IoTelescopeOutline } from 'react-icons/io5'; +import { NavLink } from 'react-router-dom'; import { getHoustonRoute, getTargetUrlFromParts, proxyHeaders, proxyUrl, tokenUrlFromAirflowUrl } from '../util'; import ValidatedUrlCheckbox from '../component/ValidatedUrlCheckbox'; import axios from "axios"; @@ -88,6 +90,14 @@ export default function SetupPage({ state, dispatch }) { Starship is a utility to migrate Airflow metadata between instances + + + + + +
+ + + ); +} diff --git a/astronomer_starship/starship_api.py b/astronomer_starship/starship_api.py index 4de6dcd..dffb7c1 100644 --- a/astronomer_starship/starship_api.py +++ b/astronomer_starship/starship_api.py @@ -2,20 +2,66 @@ from functools import partial import flask +import requests from airflow.plugins_manager import AirflowPlugin from airflow.www.app import csrf from flask import Blueprint, request, jsonify from flask_appbuilder import expose, BaseView +import os +from typing import Any, Dict, List, Union +import base64 +import logging +from json import JSONDecodeError + +from astronomer_starship.compat.starship_compatability import ( + StarshipCompatabilityLayer, + get_kwargs_fn, +) from typing import TYPE_CHECKING if TYPE_CHECKING: from typing import Callable -from astronomer_starship.compat.starship_compatability import ( - StarshipCompatabilityLayer, - get_kwargs_fn, -) + +def get_json_or_clean_str(o: str) -> Union[List[Any], Dict[Any, Any], Any]: + """For Aeroscope - Either load JSON (if we can) or strip and split the string, while logging the error""" + try: + return json.loads(o) + except (JSONDecodeError, TypeError) as e: + logging.debug(e) + logging.debug(o) + return o.strip() + + +def clean_airflow_report_output(log_string: str) -> Union[dict, str]: + r"""For Aeroscope - Look for the magic string from the Airflow report and then decode the base64 and convert to json + Or return output as a list, trimmed and split on newlines + >>> clean_airflow_report_output('INFO 123 - xyz - abc\n\n\nERROR - 1234\n%%%%%%%\naGVsbG8gd29ybGQ=') + 'hello world' + >>> clean_airflow_report_output( + ... 'INFO 123 - xyz - abc\n\n\nERROR - 1234\n%%%%%%%\neyJvdXRwdXQiOiAiaGVsbG8gd29ybGQifQ==' + ... ) + {'output': 'hello world'} + """ + + log_lines = log_string.split("\n") + enumerated_log_lines = list(enumerate(log_lines)) + found_i = -1 + for i, line in enumerated_log_lines: + if "%%%%%%%" in line: + found_i = i + 1 + break + if found_i != -1: + output = base64.decodebytes( + "\n".join(log_lines[found_i:]).encode("utf-8") + ).decode("utf-8") + try: + return json.loads(output) + except JSONDecodeError: + return get_json_or_clean_str(output) + else: + return get_json_or_clean_str(log_string) def starship_route( @@ -126,6 +172,53 @@ def ok(): return starship_route(get=ok) + @expose("/telescope", methods=["GET"]) + @csrf.exempt + def telescope(self): + from socket import gethostname + import io + import runpy + from urllib.request import urlretrieve + from contextlib import redirect_stdout, redirect_stderr + from urllib.error import HTTPError + from datetime import datetime, timezone + + aero_version = os.getenv("TELESCOPE_REPORT_RELEASE_VERSION", "latest") + a = "airflow_report.pyz" + aero_url = ( + "https://github.com/astronomer/telescope/releases/latest/download/airflow_report.pyz" + if aero_version == "latest" + else f"https://github.com/astronomer/telescope/releases/download/{aero_version}/airflow_report.pyz" + ) + try: + urlretrieve(aero_url, a) + except HTTPError as e: + raise RuntimeError( + f"Error finding specified version:{aero_version} -- Reason:{e.reason}" + ) + + s = io.StringIO() + with redirect_stdout(s), redirect_stderr(s): + runpy.run_path(a) + report = { + "telescope_version": "aeroscope-latest", + "report_date": datetime.now(timezone.utc).isoformat()[:10], + "organization_name": request.args["organization"], + "local": { + gethostname(): { + "airflow_report": clean_airflow_report_output(s.getvalue()) + } + }, + } + presigned_url = request.args.get("presigned_url", False) + if presigned_url: + try: + upload = requests.put(presigned_url, data=json.dumps(report)) + return upload.content, upload.status_code + except requests.exceptions.ConnectionError as e: + return str(e), 400 + return report + @expose("/airflow_version", methods=["GET"]) @csrf.exempt def airflow_version(self) -> str: From f5ac3e9ad8394ceb0b4e19f73670b5d218dcffe1 Mon Sep 17 00:00:00 2001 From: fritz-astronomer <80706212+fritz-astronomer@users.noreply.github.com> Date: Fri, 24 May 2024 13:26:48 -0400 Subject: [PATCH 16/28] re-export setDagData so it can be unit tested --- .../providers/starship/hooks/starship.py | 218 ++++++++++++++++++ .../providers/starship/operators/starship.py | 21 ++ .../src/pages/DAGHistoryPage.jsx | 2 +- 3 files changed, 240 insertions(+), 1 deletion(-) create mode 100644 astronomer_starship/providers/starship/hooks/starship.py create mode 100644 astronomer_starship/providers/starship/operators/starship.py diff --git a/astronomer_starship/providers/starship/hooks/starship.py b/astronomer_starship/providers/starship/hooks/starship.py new file mode 100644 index 0000000..1b515b3 --- /dev/null +++ b/astronomer_starship/providers/starship/hooks/starship.py @@ -0,0 +1,218 @@ +from logging import getLogger +import requests +from requests.adapters import HTTPAdapter +from typing import Literal, Union +from textwrap import dedent +from urllib3.util.retry import Retry + +from airflow.utils.state import DagRunState +from airflow.hooks.base import BaseHook + + +logger = getLogger(__name__) + + +def urljoin(base: str, endpoint: str) -> str: + return "/".join((base.rstrip("/"), endpoint.lstrip("/"))) + + +def session_with_retry(retries=3, backoff_factor=2): + sess = requests.Session() + retry = Retry( + total=retries, + backoff_factor=backoff_factor, + status_forcelist=[500, 502, 503, 504], + ) + sess.mount("http://", HTTPAdapter(max_retries=retry)) + sess.mount("https://", HTTPAdapter(max_retries=retry)) + return sess + + +def _request( + type: Literal["get", "post", "put", "patch"], + endpoint, + auth=None, + json=None, + params=None, + headers=None, + retries=3, + backoff_factor=2, +): + s = session_with_retry(retries=retries, backoff_factor=backoff_factor) + request_mapping = {"get": s.get, "post": s.post, "put": s.put, "patch": s.patch} + method = request_mapping.get(type) + if auth: + auth = tuple(auth) + resp = method(endpoint, params=params, json=json, auth=auth, headers=headers) + if resp.status_code != 200: + logger.info( + f"request failed with status {resp.status_code} for {type} on endpoint {endpoint} with text {resp.text}" + ) + return resp + + +class StarshipAPIHook(BaseHook): + DAG_RUNS = "api/starship/dag_runs" + TASK_INSTANCES = "api/starship/task_instances" + DAGS = "api/starship/dags" + + def __init__( + self, + webserver_url, + auth=None, + headers=None, + logger_name: str | None = None, + ): + super().__init__(logger_name) + self.webserver_url = webserver_url + self.auth = auth + self.headers = headers + + # todo: maybe create utility classes? + def get_dags(self) -> dict: + dags = urljoin(self.webserver_url, StarshipAPIHook.DAGS) + resp = _request("get", endpoint=dags, auth=self.auth, headers=self.headers) + return resp.json() + + def get_dag_runs(self, dag_id, limit=5) -> dict: + dagrun_endpoint = urljoin(self.webserver_url, StarshipAPIHook.DAG_RUNS) + resp = _request( + type="get", + endpoint=dagrun_endpoint, + auth=self.auth, + headers=self.headers, + params={"dag_id": dag_id, "limit": limit}, + ) + return resp.json() + + def set_dag_runs( + self, + dag_runs: list[dict], + ) -> dict: + dagrun_endpoint = urljoin(self.webserver_url, StarshipAPIHook.DAG_RUNS) + resp = _request( + type="post", + endpoint=dagrun_endpoint, + auth=self.auth, + headers=self.headers, + json={"dag_runs": dag_runs}, + ) + return resp.json() + + def get_latest_dagrun_state(self, dag_id) -> str: + latest = self.get_dag_runs( + dag_id=dag_id, + limit=1, + ) + logger.info(f"fetching latest dagrun for {dag_id}") + logger.info(f"{latest}") + + return latest["dag_runs"][0]["state"] + + # another reason for class to couple dagrun and task instance retrieval limits + def get_task_instances( + self, + dag_id: str, + limit: int = 5, + ) -> dict: + task_instances = urljoin(self.webserver_url, StarshipAPIHook.TASK_INSTANCES) + resp = _request( + type="get", + endpoint=task_instances, + auth=self.auth, + headers=self.headers, + params={"dag_id": dag_id, "limit": limit}, + ) + return resp.json() + + def set_task_instances(self, task_instances: list[dict]) -> dict: + task_instance_endpoint = urljoin( + self.webserver_url, StarshipAPIHook.TASK_INSTANCES + ) + resp = _request( + type="post", + endpoint=task_instance_endpoint, + auth=self.auth, + headers=self.headers, + json={"task_instances": task_instances}, + ) + return resp.json() + + def set_dag_state( + self, + dag_id: str, + action=Literal["pause", "unpause"], + ) -> requests.Response: + action_dict = {"pause": True, "unpause": False} + is_paused = action_dict[action] + payload = {"dag_id": dag_id, "is_paused": is_paused} + dag_endpoint = urljoin(self.webserver_url, StarshipAPIHook.DAGS) + return _request( + type="patch", + endpoint=dag_endpoint, + auth=self.auth, + headers=self.headers, + json=payload, + ) + + +class StarshipDagRunMigrationHook(BaseHook): + def __init__( + self, + source_webserver_url: str, + target_webserver_url: str, + source_auth: Union[tuple, list] = None, + target_auth: Union[tuple, list] = None, + source_headers: dict = None, + target_headers: dict = None, + unpause_dags_in_target=False, + logger_name: str | None = None, + ): + super().__init__(logger_name) + + self.source_api_hook = StarshipAPIHook( + webserver_url=source_webserver_url, auth=source_auth, headers=source_headers + ) + self.target_api_hook = StarshipAPIHook( + webserver_url=target_webserver_url, auth=target_auth, headers=target_headers + ) + self.unpause_dags_in_target = unpause_dags_in_target + + def load_dagruns_to_target( + self, + dag_ids: list[str] = None, + ) -> None: + if not dag_ids: + dag_ids = [dag["dag_id"] for dag in self.source_api_hook.get_dags()] + + for dag_id in dag_ids: + state = self.source_api_hook.get_latest_dagrun_state(dag_id=dag_id) + if state not in (DagRunState.FAILED, DagRunState.SUCCESS): + logger.info( + dedent( + f"""Latest dagrun for {dag_id} is not not in state + {(DagRunState.FAILED, DagRunState.SUCCESS)}. Skipping migration.""" + ) + ) + else: + self.source_api_hook.set_dag_state( + dag_id=dag_id, + action="pause", + ) + self.get_and_set_dagruns(dag_id) + self.get_and_set_task_instances(dag_id) + + if self.unpause_dags_in_target: + self.target_api_hook.set_dag_state(dag_id=dag_id, action="unpause") + + def get_and_set_dagruns(self, dag_id: str) -> None: + dag_runs = self.source_api_hook.get_dag_runs( + dag_id=dag_id, + ) + self.target_api_hook.set_dag_runs(dag_runs=dag_runs["dag_runs"]) + + def get_and_set_task_instances(self, dag_id: str) -> None: + task_instances = self.source_api_hook.get_task_instances(dag_id=dag_id) + self.target_api_hook.set_task_instances( + task_instances=task_instances["task_instances"] + ) diff --git a/astronomer_starship/providers/starship/operators/starship.py b/astronomer_starship/providers/starship/operators/starship.py new file mode 100644 index 0000000..824090d --- /dev/null +++ b/astronomer_starship/providers/starship/operators/starship.py @@ -0,0 +1,21 @@ +from airflow.models.baseoperator import BaseOperator + +from astronomer_starship.compat.starship_hook import StarshipDagRunMigrationHook + + +class StarshipOperator(BaseOperator): + """ + Migrate dag run and task run history by using this operator as a task in a DAG + """ + + def __init__(self, hook: StarshipDagRunMigrationHook = None, *args, **kwargs): + super().__init__(*args, **kwargs) + self.hook = hook + + def execute(self, context): + conf = context["conf"] + + if not self.hook: + self.hook = StarshipDagRunMigrationHook(**conf) + + return self.hook.load_dagruns_to_target(dag_ids=conf.get("dag_ids")) diff --git a/astronomer_starship/src/pages/DAGHistoryPage.jsx b/astronomer_starship/src/pages/DAGHistoryPage.jsx index 276f365..1f73a8e 100644 --- a/astronomer_starship/src/pages/DAGHistoryPage.jsx +++ b/astronomer_starship/src/pages/DAGHistoryPage.jsx @@ -160,7 +160,7 @@ DAGHistoryMigrateButton.defaultProps = { isDisabled: false, }; -function setDagData(localData, remoteData, key = 'dag_id') { +export function setDagData(localData, remoteData, key = 'dag_id') { const output = {}; localData.forEach((i) => { const keyValue = i[key]; From c8b0f16926edb4d452a3b8c33246f6a4e2da5e29 Mon Sep 17 00:00:00 2001 From: fritz-astronomer <80706212+fritz-astronomer@users.noreply.github.com> Date: Fri, 24 May 2024 13:29:27 -0400 Subject: [PATCH 17/28] modify to match standard classpaths --- astronomer_starship/providers/__init__.py | 0 astronomer_starship/providers/starship/__init__.py | 0 astronomer_starship/providers/starship/hooks/__init__.py | 0 .../starship_hook.py => providers/starship/hooks/starship.py} | 0 astronomer_starship/providers/starship/operators/__init__.py | 0 .../starship/operators/starship.py} | 4 +++- 6 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 astronomer_starship/providers/__init__.py create mode 100644 astronomer_starship/providers/starship/__init__.py create mode 100644 astronomer_starship/providers/starship/hooks/__init__.py rename astronomer_starship/{compat/starship_hook.py => providers/starship/hooks/starship.py} (100%) create mode 100644 astronomer_starship/providers/starship/operators/__init__.py rename astronomer_starship/{compat/starship_operator.py => providers/starship/operators/starship.py} (84%) diff --git a/astronomer_starship/providers/__init__.py b/astronomer_starship/providers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/astronomer_starship/providers/starship/__init__.py b/astronomer_starship/providers/starship/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/astronomer_starship/providers/starship/hooks/__init__.py b/astronomer_starship/providers/starship/hooks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/astronomer_starship/compat/starship_hook.py b/astronomer_starship/providers/starship/hooks/starship.py similarity index 100% rename from astronomer_starship/compat/starship_hook.py rename to astronomer_starship/providers/starship/hooks/starship.py diff --git a/astronomer_starship/providers/starship/operators/__init__.py b/astronomer_starship/providers/starship/operators/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/astronomer_starship/compat/starship_operator.py b/astronomer_starship/providers/starship/operators/starship.py similarity index 84% rename from astronomer_starship/compat/starship_operator.py rename to astronomer_starship/providers/starship/operators/starship.py index 824090d..f464f6e 100644 --- a/astronomer_starship/compat/starship_operator.py +++ b/astronomer_starship/providers/starship/operators/starship.py @@ -1,6 +1,8 @@ from airflow.models.baseoperator import BaseOperator -from astronomer_starship.compat.starship_hook import StarshipDagRunMigrationHook +from astronomer_starship.providers.starship.hooks.starship import ( + StarshipDagRunMigrationHook, +) class StarshipOperator(BaseOperator): From a7fd133912d8331fa20f0cc12662ba163a24fa11 Mon Sep 17 00:00:00 2001 From: fritz-astronomer <80706212+fritz-astronomer@users.noreply.github.com> Date: Fri, 24 May 2024 16:56:07 -0400 Subject: [PATCH 18/28] big WIP DRY refactor. Used the starship_compat layer directly --- .../providers/starship/hooks/starship.py | 385 +++++++++--------- .../providers/starship/operators/starship.py | 226 +++++++++- pyproject.toml | 4 + 3 files changed, 404 insertions(+), 211 deletions(-) diff --git a/astronomer_starship/providers/starship/hooks/starship.py b/astronomer_starship/providers/starship/hooks/starship.py index 1b515b3..61d4c48 100644 --- a/astronomer_starship/providers/starship/hooks/starship.py +++ b/astronomer_starship/providers/starship/hooks/starship.py @@ -1,218 +1,205 @@ -from logging import getLogger -import requests -from requests.adapters import HTTPAdapter -from typing import Literal, Union -from textwrap import dedent -from urllib3.util.retry import Retry - -from airflow.utils.state import DagRunState +from abc import ABC, abstractmethod + +from typing import List + +from airflow.providers.http.hooks.http import HttpHook from airflow.hooks.base import BaseHook +from astronomer_starship.starship_api import starship_compat + +POOLS_ROUTE = "/api/starship/pools" +CONNECTIONS_ROUTE = "/api/starship/connections" +VARIABLES_ROUTE = "/api/starship/variables" +DAGS_ROUTE = "/api/starship/dags" +DAG_RUNS_ROUTE = "/api/starship/dag_runs" +TASK_INSTANCES_ROUTE = "/api/starship/task_instances" + + +class StarshipHook(ABC): + @abstractmethod + def get_variables(self): + pass + + @abstractmethod + def set_variable(self, **kwargs): + pass + + @abstractmethod + def get_pools(self): + pass + + @abstractmethod + def set_pool(self, **kwargs): + pass + + @abstractmethod + def get_connections(self): + pass + + @abstractmethod + def set_connection(self, **kwargs): + pass + + @abstractmethod + def get_dags(self): + pass + + @abstractmethod + def set_dag_is_paused(self, dag_id: str, is_paused: bool): + pass + + @abstractmethod + def get_dag_runs(self, dag_id: str, offset: int = 0, limit: int = 10) -> dict: + pass + + @abstractmethod + def set_dag_runs(self, dag_runs: list): + pass + + @abstractmethod + def get_task_instances(self, dag_id: str, offset: int = 0, limit: int = 10): + pass + + @abstractmethod + def set_task_instances(self, task_instances: list): + pass + + +class StarshipLocalHook(BaseHook, StarshipHook): + def get_variables(self): + return starship_compat.get_variables() + + def set_variable(self, **kwargs): + raise RuntimeError("Setting local data is not supported") + + def get_pools(self): + return starship_compat.get_pools() + + def set_pool(self, **kwargs): + raise RuntimeError("Setting local data is not supported") + + def set_connection(self, **kwargs): + raise RuntimeError("Setting local data is not supported") + + def set_dag_is_paused(self, dag_id: str, is_paused: bool): + return starship_compat.set_dag_is_paused(dag_id, is_paused) + + def get_dag_runs(self, dag_id: str, offset: int = 0, limit: int = 10) -> dict: + return starship_compat.get_dag_runs(dag_id, limit) + + def set_dag_runs(self, dag_runs: list): + raise RuntimeError("Setting local data is not supported") + + def get_task_instances(self, dag_id: str, offset: int = 0, limit: int = 10): + return starship_compat.get_task_instances(dag_id, offset, limit) + + def set_task_instances(self, task_instances: list): + raise RuntimeError("Setting local data is not supported") -logger = getLogger(__name__) - - -def urljoin(base: str, endpoint: str) -> str: - return "/".join((base.rstrip("/"), endpoint.lstrip("/"))) - - -def session_with_retry(retries=3, backoff_factor=2): - sess = requests.Session() - retry = Retry( - total=retries, - backoff_factor=backoff_factor, - status_forcelist=[500, 502, 503, 504], - ) - sess.mount("http://", HTTPAdapter(max_retries=retry)) - sess.mount("https://", HTTPAdapter(max_retries=retry)) - return sess - - -def _request( - type: Literal["get", "post", "put", "patch"], - endpoint, - auth=None, - json=None, - params=None, - headers=None, - retries=3, - backoff_factor=2, -): - s = session_with_retry(retries=retries, backoff_factor=backoff_factor) - request_mapping = {"get": s.get, "post": s.post, "put": s.put, "patch": s.patch} - method = request_mapping.get(type) - if auth: - auth = tuple(auth) - resp = method(endpoint, params=params, json=json, auth=auth, headers=headers) - if resp.status_code != 200: - logger.info( - f"request failed with status {resp.status_code} for {type} on endpoint {endpoint} with text {resp.text}" - ) - return resp - - -class StarshipAPIHook(BaseHook): - DAG_RUNS = "api/starship/dag_runs" - TASK_INSTANCES = "api/starship/task_instances" - DAGS = "api/starship/dags" - - def __init__( - self, - webserver_url, - auth=None, - headers=None, - logger_name: str | None = None, - ): - super().__init__(logger_name) - self.webserver_url = webserver_url - self.auth = auth - self.headers = headers - - # todo: maybe create utility classes? def get_dags(self) -> dict: - dags = urljoin(self.webserver_url, StarshipAPIHook.DAGS) - resp = _request("get", endpoint=dags, auth=self.auth, headers=self.headers) - return resp.json() - - def get_dag_runs(self, dag_id, limit=5) -> dict: - dagrun_endpoint = urljoin(self.webserver_url, StarshipAPIHook.DAG_RUNS) - resp = _request( - type="get", - endpoint=dagrun_endpoint, - auth=self.auth, - headers=self.headers, - params={"dag_id": dag_id, "limit": limit}, - ) - return resp.json() - - def set_dag_runs( - self, - dag_runs: list[dict], - ) -> dict: - dagrun_endpoint = urljoin(self.webserver_url, StarshipAPIHook.DAG_RUNS) - resp = _request( - type="post", - endpoint=dagrun_endpoint, - auth=self.auth, - headers=self.headers, - json={"dag_runs": dag_runs}, + return starship_compat.get_dags() + + +class StarshipHttpHook(HttpHook, StarshipHook): + conn_name_attr = "http_conn_id" + default_conn_name = "starship_default" + conn_type = "http" + hook_name = "HTTP" + + def get_variables(self): + return ( + self.get_conn() + .get(self.get_connection(self.http_conn_id).url / VARIABLES_ROUTE) + .json() ) - return resp.json() - def get_latest_dagrun_state(self, dag_id) -> str: - latest = self.get_dag_runs( - dag_id=dag_id, - limit=1, + def set_variable(self, **kwargs): + return ( + self.get_conn() + .post( + self.get_connection(self.http_conn_id).url / VARIABLES_ROUTE, + json=kwargs, + ) + .json() ) - logger.info(f"fetching latest dagrun for {dag_id}") - logger.info(f"{latest}") - - return latest["dag_runs"][0]["state"] - - # another reason for class to couple dagrun and task instance retrieval limits - def get_task_instances( - self, - dag_id: str, - limit: int = 5, - ) -> dict: - task_instances = urljoin(self.webserver_url, StarshipAPIHook.TASK_INSTANCES) - resp = _request( - type="get", - endpoint=task_instances, - auth=self.auth, - headers=self.headers, - params={"dag_id": dag_id, "limit": limit}, + + def get_pools(self): + return ( + self.get_conn() + .get(self.get_connection(self.http_conn_id).url / POOLS_ROUTE) + .json() ) - return resp.json() - def set_task_instances(self, task_instances: list[dict]) -> dict: - task_instance_endpoint = urljoin( - self.webserver_url, StarshipAPIHook.TASK_INSTANCES + def set_pool(self, **kwargs): + return ( + self.get_conn() + .post(self.get_connection(self.http_conn_id).url / POOLS_ROUTE, json=kwargs) + .json() ) - resp = _request( - type="post", - endpoint=task_instance_endpoint, - auth=self.auth, - headers=self.headers, - json={"task_instances": task_instances}, + + def set_connection(self, **kwargs): + return ( + self.get_conn() + .post( + self.get_connection(self.http_conn_id).url / CONNECTIONS_ROUTE, + json=kwargs, + ) + .json() ) - return resp.json() - - def set_dag_state( - self, - dag_id: str, - action=Literal["pause", "unpause"], - ) -> requests.Response: - action_dict = {"pause": True, "unpause": False} - is_paused = action_dict[action] - payload = {"dag_id": dag_id, "is_paused": is_paused} - dag_endpoint = urljoin(self.webserver_url, StarshipAPIHook.DAGS) - return _request( - type="patch", - endpoint=dag_endpoint, - auth=self.auth, - headers=self.headers, - json=payload, + + def get_dags(self) -> dict: + return ( + self.get_conn() + .get(self.get_connection(self.http_conn_id).url / DAGS_ROUTE) + .json() ) + def get_dag_runs(self, dag_id: str, offset: int = 0, limit: int = 10) -> dict: + return ( + self.get_conn() + .get( + self.get_connection(self.http_conn_id).url / DAG_RUNS_ROUTE, + params={"dag_id": dag_id, "limit": limit}, + ) + .json() + ) -class StarshipDagRunMigrationHook(BaseHook): - def __init__( - self, - source_webserver_url: str, - target_webserver_url: str, - source_auth: Union[tuple, list] = None, - target_auth: Union[tuple, list] = None, - source_headers: dict = None, - target_headers: dict = None, - unpause_dags_in_target=False, - logger_name: str | None = None, - ): - super().__init__(logger_name) - - self.source_api_hook = StarshipAPIHook( - webserver_url=source_webserver_url, auth=source_auth, headers=source_headers + def set_dag_runs(self, dag_runs: List[dict]) -> dict: + return ( + self.get_conn() + .post( + self.get_connection(self.http_conn_id).url / DAG_RUNS_ROUTE, + json={"dag_runs": dag_runs}, + ) + .json() ) - self.target_api_hook = StarshipAPIHook( - webserver_url=target_webserver_url, auth=target_auth, headers=target_headers + + def get_task_instances(self, dag_id: str, offset: int = 0, limit: int = 10): + return ( + self.get_conn() + .get( + self.get_connection(self.http_conn_id).url / TASK_INSTANCES_ROUTE, + params={"dag_id": dag_id, "limit": limit}, + ) + .json() ) - self.unpause_dags_in_target = unpause_dags_in_target - - def load_dagruns_to_target( - self, - dag_ids: list[str] = None, - ) -> None: - if not dag_ids: - dag_ids = [dag["dag_id"] for dag in self.source_api_hook.get_dags()] - - for dag_id in dag_ids: - state = self.source_api_hook.get_latest_dagrun_state(dag_id=dag_id) - if state not in (DagRunState.FAILED, DagRunState.SUCCESS): - logger.info( - dedent( - f"""Latest dagrun for {dag_id} is not not in state - {(DagRunState.FAILED, DagRunState.SUCCESS)}. Skipping migration.""" - ) - ) - else: - self.source_api_hook.set_dag_state( - dag_id=dag_id, - action="pause", - ) - self.get_and_set_dagruns(dag_id) - self.get_and_set_task_instances(dag_id) - - if self.unpause_dags_in_target: - self.target_api_hook.set_dag_state(dag_id=dag_id, action="unpause") - - def get_and_set_dagruns(self, dag_id: str) -> None: - dag_runs = self.source_api_hook.get_dag_runs( - dag_id=dag_id, + + def set_task_instances(self, task_instances: list[dict]) -> dict: + return ( + self.get_conn() + .post( + self.get_connection(self.http_conn_id).url / TASK_INSTANCES_ROUTE, + json={"task_instances": task_instances}, + ) + .json() ) - self.target_api_hook.set_dag_runs(dag_runs=dag_runs["dag_runs"]) - def get_and_set_task_instances(self, dag_id: str) -> None: - task_instances = self.source_api_hook.get_task_instances(dag_id=dag_id) - self.target_api_hook.set_task_instances( - task_instances=task_instances["task_instances"] + def set_dag_is_paused(self, dag_id: str, is_paused: bool): + return ( + self.get_conn() + .patch( + self.get_connection(self.http_conn_id).url / DAGS_ROUTE, + json={"dag_id": dag_id, "is_paused": is_paused}, + ) + .json() ) diff --git a/astronomer_starship/providers/starship/operators/starship.py b/astronomer_starship/providers/starship/operators/starship.py index f464f6e..c38799c 100644 --- a/astronomer_starship/providers/starship/operators/starship.py +++ b/astronomer_starship/providers/starship/operators/starship.py @@ -1,23 +1,225 @@ +""" +Compatability Notes: +- @task() is >=AF2.0 +- @task_group is >=AF2.1.0 +- Dynamic Task Mapping is >=AF2.3.0 +""" +from typing import Any, Union, List + +import airflow +from airflow.decorators import task from airflow.models.baseoperator import BaseOperator +from airflow.utils.context import Context +from airflow.utils.task_group import TaskGroup from astronomer_starship.providers.starship.hooks.starship import ( - StarshipDagRunMigrationHook, + StarshipLocalHook, + StarshipHttpHook, ) -class StarshipOperator(BaseOperator): - """ - Migrate dag run and task run history by using this operator as a task in a DAG - """ +class StarshipMigrationOperator(BaseOperator): + def __init__(self, http_conn_id=None, **kwargs): + super().__init__(**kwargs) + self.source_hook = StarshipLocalHook() + self.target_hook = StarshipHttpHook(http_conn_id=http_conn_id) + + +class StarshipVariableMigrationOperator(StarshipMigrationOperator): + def __init__(self, variable_key: Union[List[str], None] = None, **kwargs): + super().__init__(**kwargs) + self.variable_key = variable_key + + def execute(self, context: Context) -> Any: + self.source_hook.get_variables() + # TODO + + +def starship_variables_migration(variables: List[str] = None, **kwargs): + with TaskGroup("variables") as tg: + + @task() + def variables_task(): + _variables = StarshipLocalHook().get_variables() + return ( + _variables + if variables is None + else {k for k in _variables if k in variables} + ) + + variables_output = variables_task() + if airflow.__version__ >= "2.3.0": + ( + StarshipVariableMigrationOperator.partial(**kwargs).expand( + task_id="migrate_variables", variable=variables_output + ) + ) + else: + for variable in variables_output: + ( + variables_output + >> StarshipVariableMigrationOperator( + task_id=f"migrate_variable_{variable}", + variable_key=variable, + **kwargs, + ) + ) + return tg + + +class StarshipPoolMigrationOperator(StarshipMigrationOperator): + def __init__(self, pool_name: Union[List[str], None] = None, **kwargs): + super().__init__(**kwargs) + self.pool_name = pool_name + + def execute(self, context: Context) -> Any: + # TODO + pass + + +def starship_pools_migration(pools: List[str] = None, **kwargs): + with TaskGroup("pools") as tg: + + @task() + def pools_task(): + _pools = StarshipLocalHook().get_pools() + return _pools if pools is None else {k for k in _pools if k in pools} + + pools_output = pools_task() + if airflow.__version__ >= "2.3.0": + ( + StarshipPoolMigrationOperator.partial(**kwargs).expand( + task_id="migrate_pools", variable=pools_output + ) + ) + else: + for pool in pools_output: + ( + pools_output + >> StarshipPoolMigrationOperator( + task_id=f"migrate_pool_{pool}", pool_name=pool, **kwargs + ) + ) + return tg - def __init__(self, hook: StarshipDagRunMigrationHook = None, *args, **kwargs): - super().__init__(*args, **kwargs) - self.hook = hook + +class StarshipConnectionMigrationOperator(StarshipMigrationOperator): + def __init__(self, connection_id: Union[List[str], None] = None, **kwargs): + super().__init__(**kwargs) + self.connection_id = connection_id + + def execute(self, context: Context) -> Any: + # TODO + pass + + +def starship_connections_migration(connections: List[str] = None, **kwargs): + with TaskGroup("connections") as tg: + + @task() + def connections_task(): + _connections = StarshipLocalHook().get_connections() + return ( + _connections + if connections is None + else {k for k in _connections if k in connections} + ) + + connections_output = connections_task() + if airflow.__version__ >= "2.3.0": + ( + StarshipConnectionMigrationOperator.partial(**kwargs).expand( + task_id="migrate_connections", variable=connections_output + ) + ) + else: + for connection in connections_output: + ( + connections_output + >> StarshipConnectionMigrationOperator( + task_id=f"migrate_connection_{connection}", + connection_name=connection, + **kwargs, + ) + ) + return tg + + +class StarshipDagHistoryMigrationOperator(StarshipMigrationOperator): + def __init__( + self, + target_dag_id: str, + unpause_dag_in_target: bool = False, + dag_run_limit: int = 10, + **kwargs, + ): + super().__init__(**kwargs) + self.target_dag_id = target_dag_id + self.unpause_dag_in_target = unpause_dag_in_target + self.dag_run_limit = dag_run_limit def execute(self, context): - conf = context["conf"] + self.source_hook.set_dag_is_paused(dag_id=self.target_dag_id, is_paused=True) + # TODO - Poll until all tasks are done + + dag_runs = self.source_hook.get_dag_runs( + dag_id=self.target_dag_id, limit=self.dag_run_limit + ) + task_instances = self.source_hook.get_task_instances( + dag_id=self.target_dag_id, limit=self.dag_run_limit + ) + + self.target_hook.set_dag_runs(dag_runs=dag_runs["dag_runs"]) + self.target_hook.set_task_instances( + task_instances=task_instances["task_instances"] + ) + + if self.unpause_dag_in_target: + self.target_hook.set_dag_is_paused( + dag_id=self.target_dag_id, is_paused=False + ) + + +def starship_dag_history_migration(dag_ids: List[str] = None, **kwargs): + with TaskGroup("dag_history") as tg: + + @task() + def dag_ids_task(): + _dag_ids = StarshipLocalHook().get_dags() + return ( + [k.dag_id for k in _dag_ids] + if dag_ids is None + else [k.dag_id for k in _dag_ids if k in dag_ids] + ) + + dag_ids_output = dag_ids_task() + if airflow.__version__ >= "2.3.0": + ( + StarshipDagHistoryMigrationOperator.partial(**kwargs).expand( + task_id="migrate_dag_ids", variable=dag_ids_output + ) + ) + else: + for dag_id in dag_ids_output: + ( + dag_ids_output + >> StarshipDagHistoryMigrationOperator( + task_id=f"migrate_dag_{dag_id}", target_dag_id=dag_id, **kwargs + ) + ) + return tg - if not self.hook: - self.hook = StarshipDagRunMigrationHook(**conf) - return self.hook.load_dagruns_to_target(dag_ids=conf.get("dag_ids")) +def starship_migration( + variables: List[str] = None, + pools: List[str] = None, + connections: List[str] = None, + dag_ids: List[str] = None, + **kwargs, +): + with TaskGroup("migration") as tg: + starship_variables_migration(variables=variables, **kwargs) + starship_pools_migration(pools=pools, **kwargs) + starship_connections_migration(connections=connections, **kwargs) + starship_dag_history_migration(dag_ids=dag_ids, **kwargs) + return tg diff --git a/pyproject.toml b/pyproject.toml index 821f3db..fe063fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,10 @@ exclude = [ ] [project.optional-dependencies] +provider = [ + "apache-airflow-providers-http" +] + dev = [ # package "twine", From ea9137ba4de40e44b69c5b5f100c952cee3a7fb1 Mon Sep 17 00:00:00 2001 From: fritz-astronomer <80706212+fritz-astronomer@users.noreply.github.com> Date: Fri, 31 May 2024 12:03:05 -0400 Subject: [PATCH 19/28] task-group and DAG approach, manually tested --- .../providers/starship/hooks/starship.py | 162 +++++----- .../providers/starship/operators/starship.py | 283 ++++++++++++------ 2 files changed, 269 insertions(+), 176 deletions(-) diff --git a/astronomer_starship/providers/starship/hooks/starship.py b/astronomer_starship/providers/starship/hooks/starship.py index 61d4c48..3b8e2bc 100644 --- a/astronomer_starship/providers/starship/hooks/starship.py +++ b/astronomer_starship/providers/starship/hooks/starship.py @@ -78,128 +78,114 @@ def get_pools(self): def set_pool(self, **kwargs): raise RuntimeError("Setting local data is not supported") + # noinspection PyMethodOverriding + def get_connections(self): + return starship_compat.get_connections() + def set_connection(self, **kwargs): raise RuntimeError("Setting local data is not supported") + def get_dags(self) -> dict: + return starship_compat.get_dags() + def set_dag_is_paused(self, dag_id: str, is_paused: bool): return starship_compat.set_dag_is_paused(dag_id, is_paused) def get_dag_runs(self, dag_id: str, offset: int = 0, limit: int = 10) -> dict: - return starship_compat.get_dag_runs(dag_id, limit) + return starship_compat.get_dag_runs(dag_id, offset=offset, limit=limit) def set_dag_runs(self, dag_runs: list): raise RuntimeError("Setting local data is not supported") def get_task_instances(self, dag_id: str, offset: int = 0, limit: int = 10): - return starship_compat.get_task_instances(dag_id, offset, limit) + return starship_compat.get_task_instances(dag_id, offset=offset, limit=limit) def set_task_instances(self, task_instances: list): raise RuntimeError("Setting local data is not supported") - def get_dags(self) -> dict: - return starship_compat.get_dags() - class StarshipHttpHook(HttpHook, StarshipHook): - conn_name_attr = "http_conn_id" - default_conn_name = "starship_default" - conn_type = "http" - hook_name = "HTTP" - def get_variables(self): - return ( - self.get_conn() - .get(self.get_connection(self.http_conn_id).url / VARIABLES_ROUTE) - .json() - ) + conn = self.get_conn() + url = self.url_from_endpoint(VARIABLES_ROUTE) + res = conn.get(url) + res.raise_for_status() + return res.json() def set_variable(self, **kwargs): - return ( - self.get_conn() - .post( - self.get_connection(self.http_conn_id).url / VARIABLES_ROUTE, - json=kwargs, - ) - .json() - ) + conn = self.get_conn() + url = self.url_from_endpoint(VARIABLES_ROUTE) + res = conn.post(url, json=kwargs) + res.raise_for_status() + return res.json() def get_pools(self): - return ( - self.get_conn() - .get(self.get_connection(self.http_conn_id).url / POOLS_ROUTE) - .json() - ) + conn = self.get_conn() + url = self.url_from_endpoint(POOLS_ROUTE) + res = conn.get(url) + res.raise_for_status() + return res.json() def set_pool(self, **kwargs): - return ( - self.get_conn() - .post(self.get_connection(self.http_conn_id).url / POOLS_ROUTE, json=kwargs) - .json() - ) + conn = self.get_conn() + url = self.url_from_endpoint(POOLS_ROUTE) + res = conn.post(url, json=kwargs) + res.raise_for_status() + return res.json() + + # noinspection PyMethodOverriding + def get_connections(self): + conn = self.get_conn() + url = self.url_from_endpoint(CONNECTIONS_ROUTE) + res = conn.get(url) + res.raise_for_status() + return res.json() def set_connection(self, **kwargs): - return ( - self.get_conn() - .post( - self.get_connection(self.http_conn_id).url / CONNECTIONS_ROUTE, - json=kwargs, - ) - .json() - ) + conn = self.get_conn() + url = self.url_from_endpoint(CONNECTIONS_ROUTE) + res = conn.post(url, json=kwargs) + res.raise_for_status() + return res.json() def get_dags(self) -> dict: - return ( - self.get_conn() - .get(self.get_connection(self.http_conn_id).url / DAGS_ROUTE) - .json() - ) + conn = self.get_conn() + url = self.url_from_endpoint(DAGS_ROUTE) + res = conn.get(url) + res.raise_for_status() + return res.json() + + def set_dag_is_paused(self, dag_id: str, is_paused: bool): + conn = self.get_conn() + url = self.url_from_endpoint(DAGS_ROUTE) + res = conn.patch(url, json={"dag_id": dag_id, "is_paused": is_paused}) + res.raise_for_status() + return res.json() def get_dag_runs(self, dag_id: str, offset: int = 0, limit: int = 10) -> dict: - return ( - self.get_conn() - .get( - self.get_connection(self.http_conn_id).url / DAG_RUNS_ROUTE, - params={"dag_id": dag_id, "limit": limit}, - ) - .json() - ) + conn = self.get_conn() + url = self.url_from_endpoint(DAG_RUNS_ROUTE) + res = conn.get(url, params={"dag_id": dag_id, "limit": limit}) + res.raise_for_status() + return res.json() def set_dag_runs(self, dag_runs: List[dict]) -> dict: - return ( - self.get_conn() - .post( - self.get_connection(self.http_conn_id).url / DAG_RUNS_ROUTE, - json={"dag_runs": dag_runs}, - ) - .json() - ) + conn = self.get_conn() + url = self.url_from_endpoint(DAG_RUNS_ROUTE) + res = conn.post(url, json={"dag_runs": dag_runs}) + res.raise_for_status() + return res.json() def get_task_instances(self, dag_id: str, offset: int = 0, limit: int = 10): - return ( - self.get_conn() - .get( - self.get_connection(self.http_conn_id).url / TASK_INSTANCES_ROUTE, - params={"dag_id": dag_id, "limit": limit}, - ) - .json() - ) + conn = self.get_conn() + url = self.url_from_endpoint(TASK_INSTANCES_ROUTE) + res = conn.get(url, params={"dag_id": dag_id, "limit": limit}) + res.raise_for_status() + return res.json() def set_task_instances(self, task_instances: list[dict]) -> dict: - return ( - self.get_conn() - .post( - self.get_connection(self.http_conn_id).url / TASK_INSTANCES_ROUTE, - json={"task_instances": task_instances}, - ) - .json() - ) - - def set_dag_is_paused(self, dag_id: str, is_paused: bool): - return ( - self.get_conn() - .patch( - self.get_connection(self.http_conn_id).url / DAGS_ROUTE, - json={"dag_id": dag_id, "is_paused": is_paused}, - ) - .json() - ) + conn = self.get_conn() + url = self.url_from_endpoint(TASK_INSTANCES_ROUTE) + res = conn.post(url, json={"task_instances": task_instances}) + res.raise_for_status() + return res.json() diff --git a/astronomer_starship/providers/starship/operators/starship.py b/astronomer_starship/providers/starship/operators/starship.py index c38799c..0d830d9 100644 --- a/astronomer_starship/providers/starship/operators/starship.py +++ b/astronomer_starship/providers/starship/operators/starship.py @@ -1,13 +1,17 @@ """ Compatability Notes: - @task() is >=AF2.0 -- @task_group is >=AF2.1.0 -- Dynamic Task Mapping is >=AF2.3.0 +- @task_group is >=AF2.1 +- Dynamic Task Mapping is >=AF2.3 +- Dynamic Task Mapping labelling is >=AF2.9 """ +from datetime import datetime from typing import Any, Union, List import airflow +from airflow import DAG from airflow.decorators import task +from airflow.exceptions import AirflowSkipException from airflow.models.baseoperator import BaseOperator from airflow.utils.context import Context from airflow.utils.task_group import TaskGroup @@ -26,121 +30,151 @@ def __init__(self, http_conn_id=None, **kwargs): class StarshipVariableMigrationOperator(StarshipMigrationOperator): - def __init__(self, variable_key: Union[List[str], None] = None, **kwargs): + def __init__(self, variable_key: Union[str, None] = None, **kwargs): super().__init__(**kwargs) self.variable_key = variable_key def execute(self, context: Context) -> Any: - self.source_hook.get_variables() - # TODO + print("Getting Variable", self.variable_key) + variables = self.source_hook.get_variables() + variable: Union[dict, None] = ( + [v for v in variables if v["key"] == self.variable_key] or [None] + )[0] + if variable is not None: + print("Migrating Variable", self.variable_key) + self.target_hook.set_variable(**variable) + else: + raise RuntimeError("Variable not found! " + self.variable_key) def starship_variables_migration(variables: List[str] = None, **kwargs): with TaskGroup("variables") as tg: @task() - def variables_task(): + def get_variables(): _variables = StarshipLocalHook().get_variables() - return ( - _variables - if variables is None - else {k for k in _variables if k in variables} + + _variables = ( + [k["key"] for k in _variables if k["key"] in variables] + if variables is not None + else [k["key"] for k in _variables] ) - variables_output = variables_task() + if not len(_variables): + raise AirflowSkipException("Nothing to migrate") + return _variables + + variables_results = get_variables() if airflow.__version__ >= "2.3.0": - ( - StarshipVariableMigrationOperator.partial(**kwargs).expand( - task_id="migrate_variables", variable=variables_output - ) - ) + StarshipVariableMigrationOperator.partial( + task_id="migrate_variables", **kwargs + ).expand(variable_key=variables_results) else: - for variable in variables_output: - ( - variables_output - >> StarshipVariableMigrationOperator( - task_id=f"migrate_variable_{variable}", - variable_key=variable, - **kwargs, - ) + for variable in variables_results.output: + variables_results >> StarshipVariableMigrationOperator( + task_id="migrate_variable_" + variable, + variable_key=variable, + **kwargs, ) return tg class StarshipPoolMigrationOperator(StarshipMigrationOperator): - def __init__(self, pool_name: Union[List[str], None] = None, **kwargs): + def __init__(self, pool_name: Union[str, None] = None, **kwargs): super().__init__(**kwargs) self.pool_name = pool_name def execute(self, context: Context) -> Any: - # TODO - pass + print("Getting Pool", self.pool_name) + pool: Union[dict, None] = ( + [v for v in self.source_hook.get_pools() if v["name"] == self.pool_name] + or [None] + )[0] + if pool is not None: + print("Migrating Pool", self.pool_name) + self.target_hook.set_pool(**pool) + else: + raise RuntimeError("Pool not found!") def starship_pools_migration(pools: List[str] = None, **kwargs): with TaskGroup("pools") as tg: @task() - def pools_task(): + def get_pools(): _pools = StarshipLocalHook().get_pools() - return _pools if pools is None else {k for k in _pools if k in pools} + _pools = ( + [k["name"] for k in _pools if k["name"] in pools] + if pools is not None + else [k["name"] for k in _pools] + ) + + if not len(_pools): + raise AirflowSkipException("Nothing to migrate") + return _pools - pools_output = pools_task() + pools_result = get_pools() if airflow.__version__ >= "2.3.0": - ( - StarshipPoolMigrationOperator.partial(**kwargs).expand( - task_id="migrate_pools", variable=pools_output - ) - ) + StarshipPoolMigrationOperator.partial( + task_id="migrate_pools", **kwargs + ).expand(pool_name=pools_result) else: - for pool in pools_output: - ( - pools_output - >> StarshipPoolMigrationOperator( - task_id=f"migrate_pool_{pool}", pool_name=pool, **kwargs - ) + for pool in pools_result.output: + pools_result >> StarshipPoolMigrationOperator( + task_id="migrate_pool_" + pool, pool_name=pool, **kwargs ) return tg class StarshipConnectionMigrationOperator(StarshipMigrationOperator): - def __init__(self, connection_id: Union[List[str], None] = None, **kwargs): + def __init__(self, connection_id: Union[str, None] = None, **kwargs): super().__init__(**kwargs) self.connection_id = connection_id def execute(self, context: Context) -> Any: - # TODO - pass + print("Getting Connection", self.connection_id) + connection: Union[dict, None] = ( + [ + v + for v in self.source_hook.get_connections() + if v["conn_id"] == self.connection_id + ] + or [None] + )[0] + if connection is not None: + print("Migrating Connection", self.connection_id) + self.target_hook.set_connection(**connection) + else: + raise RuntimeError("Connection not found!") def starship_connections_migration(connections: List[str] = None, **kwargs): with TaskGroup("connections") as tg: @task() - def connections_task(): + def get_connections(): _connections = StarshipLocalHook().get_connections() - return ( - _connections - if connections is None - else {k for k in _connections if k in connections} + _connections = ( + [k["conn_id"] for k in _connections if k["conn_id"] in connections] + if connections is not None + else [k["conn_id"] for k in _connections] ) - connections_output = connections_task() + if not len(_connections): + raise AirflowSkipException("Nothing to migrate") + return _connections + + connections_result = get_connections() if airflow.__version__ >= "2.3.0": - ( - StarshipConnectionMigrationOperator.partial(**kwargs).expand( - task_id="migrate_connections", variable=connections_output - ) - ) + StarshipConnectionMigrationOperator.partial( + task_id="migrate_connections", **kwargs + ).expand(connection_id=connections_result) else: - for connection in connections_output: - ( - connections_output - >> StarshipConnectionMigrationOperator( - task_id=f"migrate_connection_{connection}", - connection_name=connection, - **kwargs, - ) + for connection in connections_result.output: + connections_result >> StarshipConnectionMigrationOperator( + task_id="migrate_connection_" + connection.conn_id, + connection_id=connection, + **kwargs, ) return tg @@ -159,22 +193,36 @@ def __init__( self.dag_run_limit = dag_run_limit def execute(self, context): + print("Pausing local DAG for", self.target_dag_id) self.source_hook.set_dag_is_paused(dag_id=self.target_dag_id, is_paused=True) # TODO - Poll until all tasks are done + print("Getting local DAG Runs for", self.target_dag_id) dag_runs = self.source_hook.get_dag_runs( dag_id=self.target_dag_id, limit=self.dag_run_limit ) + if len(dag_runs["dag_runs"]) == 0: + raise AirflowSkipException("No DAG Runs found for " + self.target_dag_id) + + print("Getting local Task Instances for", self.target_dag_id) task_instances = self.source_hook.get_task_instances( dag_id=self.target_dag_id, limit=self.dag_run_limit ) + if len(task_instances["task_instances"]) == 0: + raise AirflowSkipException( + "No Task Instances found for " + self.target_dag_id + ) + print("Setting target DAG Runs for", self.target_dag_id) self.target_hook.set_dag_runs(dag_runs=dag_runs["dag_runs"]) + + print("Setting target Task Instances for", self.target_dag_id) self.target_hook.set_task_instances( task_instances=task_instances["task_instances"] ) if self.unpause_dag_in_target: + print("Unpausing target DAG for", self.target_dag_id) self.target_hook.set_dag_is_paused( dag_id=self.target_dag_id, is_paused=False ) @@ -184,42 +232,101 @@ def starship_dag_history_migration(dag_ids: List[str] = None, **kwargs): with TaskGroup("dag_history") as tg: @task() - def dag_ids_task(): - _dag_ids = StarshipLocalHook().get_dags() - return ( - [k.dag_id for k in _dag_ids] - if dag_ids is None - else [k.dag_id for k in _dag_ids if k in dag_ids] + def get_dags(): + _dags = StarshipLocalHook().get_dags() + _dags = ( + [ + k["dag_id"] + for k in _dags + if k["dag_id"] in dag_ids + and k["dag_id"] != "StarshipAirflowMigrationDAG" + ] + if dag_ids is not None + else [ + k["dag_id"] + for k in _dags + if k["dag_id"] != "StarshipAirflowMigrationDAG" + ] ) - dag_ids_output = dag_ids_task() + if not len(_dags): + raise AirflowSkipException("Nothing to migrate") + return _dags + + dags_result = get_dags() if airflow.__version__ >= "2.3.0": - ( - StarshipDagHistoryMigrationOperator.partial(**kwargs).expand( - task_id="migrate_dag_ids", variable=dag_ids_output - ) - ) + StarshipDagHistoryMigrationOperator.partial( + task_id="migrate_dag_ids", + **( + {"map_index_template": "{{ task.target_dag_id }}"} + if airflow.__version__ >= "2.9.0" + else {} + ), + **kwargs, + ).expand(target_dag_id=dags_result) else: - for dag_id in dag_ids_output: - ( - dag_ids_output - >> StarshipDagHistoryMigrationOperator( - task_id=f"migrate_dag_{dag_id}", target_dag_id=dag_id, **kwargs - ) + for dag_id in dags_result.output: + dags_result >> StarshipDagHistoryMigrationOperator( + task_id="migrate_dag_" + dag_id, target_dag_id=dag_id, **kwargs ) return tg -def starship_migration( +# noinspection PyPep8Naming +def StarshipMigrationDAG( + http_conn_id: str, variables: List[str] = None, pools: List[str] = None, connections: List[str] = None, dag_ids: List[str] = None, **kwargs, ): - with TaskGroup("migration") as tg: - starship_variables_migration(variables=variables, **kwargs) - starship_pools_migration(pools=pools, **kwargs) - starship_connections_migration(connections=connections, **kwargs) - starship_dag_history_migration(dag_ids=dag_ids, **kwargs) - return tg + dag = DAG( + dag_id="StarshipAirflowMigrationDAG", + schedule="@once", + start_date=datetime(1970, 1, 1), + tags=["migration", "starship"], + default_args={"owner": "Astronomer"}, + doc_md=""" + # Starship Migration DAG + A DAG to migrate Airflow Variables, Pools, Connections, and DAG History from one Airflow instance to another. + + ## Usage: + ```python + from astronomer_starship.providers.starship.operators.starship import ( + StarshipMigrationDAG, + ) + + # Make a connection in Airflow with the following details: + import os + + os.environ["AIRFLOW_CONN_STARSHIP_DEFAULT"] = ( + "{" + ' "conn_id": "starship_default", ' + ' "host": "", "port": 443, "schema": "https", ' + ' "extras": {"Authorization": "Bearer "}' + "}" + ) + + globals()["StarshipAirflowMigrationDAG"] = StarshipMigrationDAG( + http_conn_id="starship_default", + variables=["var1", "var2"], # or None to migrate all, or empty list to skip + pools=["pool1", "pool2"], # or None to migrate all, or empty list to skip + connections=["conn1", "conn2"], # or None to migrate all, or empty list to skip + dag_ids=["dag1", "dag2"], # or None to migrate all, or empty list to skip + ) + ``` + """, + ) + with dag: + starship_variables_migration( + variables=variables, http_conn_id=http_conn_id, **kwargs + ) + starship_pools_migration(pools=pools, http_conn_id=http_conn_id, **kwargs) + starship_connections_migration( + connections=connections, http_conn_id=http_conn_id, **kwargs + ) + starship_dag_history_migration( + dag_ids=dag_ids, http_conn_id=http_conn_id, **kwargs + ) + return dag From 5d90ede3449fcf2f763982f18f721b4e87da923e Mon Sep 17 00:00:00 2001 From: fritz-astronomer <80706212+fritz-astronomer@users.noreply.github.com> Date: Fri, 31 May 2024 14:34:39 -0400 Subject: [PATCH 20/28] add docs. Doc API errors --- .../providers/starship/hooks/starship.py | 62 ++++++++++ .../providers/starship/operators/starship.py | 71 +++++++----- docs/api.md | 15 +++ docs/index.md | 2 +- docs/operator.md | 107 ++++++++++-------- starship.png => docs/starship.png | Bin starship.svg => docs/starship.svg | 0 .../starship_diagram.svg | 0 mkdocs.yml | 6 + 9 files changed, 187 insertions(+), 76 deletions(-) rename starship.png => docs/starship.png (100%) rename starship.svg => docs/starship.svg (100%) rename starship_diagram.svg => docs/starship_diagram.svg (100%) diff --git a/astronomer_starship/providers/starship/hooks/starship.py b/astronomer_starship/providers/starship/hooks/starship.py index 3b8e2bc..bccb208 100644 --- a/astronomer_starship/providers/starship/hooks/starship.py +++ b/astronomer_starship/providers/starship/hooks/starship.py @@ -1,3 +1,6 @@ +""" +Hooks for interacting with Starship migrations +""" from abc import ABC, abstractmethod from typing import List @@ -66,13 +69,21 @@ def set_task_instances(self, task_instances: list): class StarshipLocalHook(BaseHook, StarshipHook): + """Hook to retrieve local Airflow data, which can then be sent to the Target Starship instance.""" + def get_variables(self): + """ + Get all variables from the local Airflow instance. + """ return starship_compat.get_variables() def set_variable(self, **kwargs): raise RuntimeError("Setting local data is not supported") def get_pools(self): + """ + Get all pools from the local Airflow instance. + """ return starship_compat.get_pools() def set_pool(self, **kwargs): @@ -80,24 +91,39 @@ def set_pool(self, **kwargs): # noinspection PyMethodOverriding def get_connections(self): + """ + Get all connections from the local Airflow instance. + """ return starship_compat.get_connections() def set_connection(self, **kwargs): raise RuntimeError("Setting local data is not supported") def get_dags(self) -> dict: + """ + Get all DAGs from the local Airflow instance. + """ return starship_compat.get_dags() def set_dag_is_paused(self, dag_id: str, is_paused: bool): + """ + Set the paused status of a DAG in the local Airflow instance. + """ return starship_compat.set_dag_is_paused(dag_id, is_paused) def get_dag_runs(self, dag_id: str, offset: int = 0, limit: int = 10) -> dict: + """ + Get DAG runs from the local Airflow instance. + """ return starship_compat.get_dag_runs(dag_id, offset=offset, limit=limit) def set_dag_runs(self, dag_runs: list): raise RuntimeError("Setting local data is not supported") def get_task_instances(self, dag_id: str, offset: int = 0, limit: int = 10): + """ + Get task instances from the local Airflow instance. + """ return starship_compat.get_task_instances(dag_id, offset=offset, limit=limit) def set_task_instances(self, task_instances: list): @@ -106,6 +132,9 @@ def set_task_instances(self, task_instances: list): class StarshipHttpHook(HttpHook, StarshipHook): def get_variables(self): + """ + Get all variables from the Target Starship instance. + """ conn = self.get_conn() url = self.url_from_endpoint(VARIABLES_ROUTE) res = conn.get(url) @@ -113,6 +142,9 @@ def get_variables(self): return res.json() def set_variable(self, **kwargs): + """ + Set a variable in the Target Starship instance. + """ conn = self.get_conn() url = self.url_from_endpoint(VARIABLES_ROUTE) res = conn.post(url, json=kwargs) @@ -120,6 +152,9 @@ def set_variable(self, **kwargs): return res.json() def get_pools(self): + """ + Get all pools from the Target Starship instance. + """ conn = self.get_conn() url = self.url_from_endpoint(POOLS_ROUTE) res = conn.get(url) @@ -127,6 +162,9 @@ def get_pools(self): return res.json() def set_pool(self, **kwargs): + """ + Set a pool in the Target Starship instance. + """ conn = self.get_conn() url = self.url_from_endpoint(POOLS_ROUTE) res = conn.post(url, json=kwargs) @@ -135,6 +173,9 @@ def set_pool(self, **kwargs): # noinspection PyMethodOverriding def get_connections(self): + """ + Get all connections from the Target Starship instance. + """ conn = self.get_conn() url = self.url_from_endpoint(CONNECTIONS_ROUTE) res = conn.get(url) @@ -142,6 +183,9 @@ def get_connections(self): return res.json() def set_connection(self, **kwargs): + """ + Set a connection in the Target Starship instance. + """ conn = self.get_conn() url = self.url_from_endpoint(CONNECTIONS_ROUTE) res = conn.post(url, json=kwargs) @@ -149,6 +193,9 @@ def set_connection(self, **kwargs): return res.json() def get_dags(self) -> dict: + """ + Get all DAGs from the Target Starship instance. + """ conn = self.get_conn() url = self.url_from_endpoint(DAGS_ROUTE) res = conn.get(url) @@ -156,6 +203,9 @@ def get_dags(self) -> dict: return res.json() def set_dag_is_paused(self, dag_id: str, is_paused: bool): + """ + Set the paused status of a DAG in the Target Starship instance. + """ conn = self.get_conn() url = self.url_from_endpoint(DAGS_ROUTE) res = conn.patch(url, json={"dag_id": dag_id, "is_paused": is_paused}) @@ -163,6 +213,9 @@ def set_dag_is_paused(self, dag_id: str, is_paused: bool): return res.json() def get_dag_runs(self, dag_id: str, offset: int = 0, limit: int = 10) -> dict: + """ + Get DAG runs from the Target Starship instance. + """ conn = self.get_conn() url = self.url_from_endpoint(DAG_RUNS_ROUTE) res = conn.get(url, params={"dag_id": dag_id, "limit": limit}) @@ -170,6 +223,9 @@ def get_dag_runs(self, dag_id: str, offset: int = 0, limit: int = 10) -> dict: return res.json() def set_dag_runs(self, dag_runs: List[dict]) -> dict: + """ + Set DAG runs in the Target Starship instance. + """ conn = self.get_conn() url = self.url_from_endpoint(DAG_RUNS_ROUTE) res = conn.post(url, json={"dag_runs": dag_runs}) @@ -177,6 +233,9 @@ def set_dag_runs(self, dag_runs: List[dict]) -> dict: return res.json() def get_task_instances(self, dag_id: str, offset: int = 0, limit: int = 10): + """ + Get task instances from the Target Starship instance. + """ conn = self.get_conn() url = self.url_from_endpoint(TASK_INSTANCES_ROUTE) res = conn.get(url, params={"dag_id": dag_id, "limit": limit}) @@ -184,6 +243,9 @@ def get_task_instances(self, dag_id: str, offset: int = 0, limit: int = 10): return res.json() def set_task_instances(self, task_instances: list[dict]) -> dict: + """ + Set task instances in the Target Starship instance. + """ conn = self.get_conn() url = self.url_from_endpoint(TASK_INSTANCES_ROUTE) res = conn.post(url, json={"task_instances": task_instances}) diff --git a/astronomer_starship/providers/starship/operators/starship.py b/astronomer_starship/providers/starship/operators/starship.py index 0d830d9..36549c8 100644 --- a/astronomer_starship/providers/starship/operators/starship.py +++ b/astronomer_starship/providers/starship/operators/starship.py @@ -1,10 +1,4 @@ -""" -Compatability Notes: -- @task() is >=AF2.0 -- @task_group is >=AF2.1 -- Dynamic Task Mapping is >=AF2.3 -- Dynamic Task Mapping labelling is >=AF2.9 -""" +"""Operators, TaskGroups, and DAGs for interacting with the Starship migrations.""" from datetime import datetime from typing import Any, Union, List @@ -21,6 +15,12 @@ StarshipHttpHook, ) +# Compatability Notes: +# - @task() is >=AF2.0 +# - @task_group is >=AF2.1 +# - Dynamic Task Mapping is >=AF2.3 +# - Dynamic Task Mapping labelling is >=AF2.9 + class StarshipMigrationOperator(BaseOperator): def __init__(self, http_conn_id=None, **kwargs): @@ -30,6 +30,8 @@ def __init__(self, http_conn_id=None, **kwargs): class StarshipVariableMigrationOperator(StarshipMigrationOperator): + """Operator to migrate a single Variable from one Airflow instance to another.""" + def __init__(self, variable_key: Union[str, None] = None, **kwargs): super().__init__(**kwargs) self.variable_key = variable_key @@ -48,6 +50,7 @@ def execute(self, context: Context) -> Any: def starship_variables_migration(variables: List[str] = None, **kwargs): + """TaskGroup to fetch and migrate Variables from one Airflow instance to another.""" with TaskGroup("variables") as tg: @task() @@ -80,6 +83,8 @@ def get_variables(): class StarshipPoolMigrationOperator(StarshipMigrationOperator): + """Operator to migrate a single Pool from one Airflow instance to another.""" + def __init__(self, pool_name: Union[str, None] = None, **kwargs): super().__init__(**kwargs) self.pool_name = pool_name @@ -98,6 +103,7 @@ def execute(self, context: Context) -> Any: def starship_pools_migration(pools: List[str] = None, **kwargs): + """TaskGroup to fetch and migrate Pools from one Airflow instance to another.""" with TaskGroup("pools") as tg: @task() @@ -127,6 +133,8 @@ def get_pools(): class StarshipConnectionMigrationOperator(StarshipMigrationOperator): + """Operator to migrate a single Connection from one Airflow instance to another.""" + def __init__(self, connection_id: Union[str, None] = None, **kwargs): super().__init__(**kwargs) self.connection_id = connection_id @@ -149,6 +157,7 @@ def execute(self, context: Context) -> Any: def starship_connections_migration(connections: List[str] = None, **kwargs): + """TaskGroup to fetch and migrate Connections from one Airflow instance to another.""" with TaskGroup("connections") as tg: @task() @@ -180,6 +189,8 @@ def get_connections(): class StarshipDagHistoryMigrationOperator(StarshipMigrationOperator): + """Operator to migrate a single DAG from one Airflow instance to another, with it's history.""" + def __init__( self, target_dag_id: str, @@ -229,6 +240,7 @@ def execute(self, context): def starship_dag_history_migration(dag_ids: List[str] = None, **kwargs): + """TaskGroup to fetch and migrate DAGs with their history from one Airflow instance to another.""" with TaskGroup("dag_history") as tg: @task() @@ -273,7 +285,7 @@ def get_dags(): # noinspection PyPep8Naming -def StarshipMigrationDAG( +def StarshipAirflowMigrationDAG( http_conn_id: str, variables: List[str] = None, pools: List[str] = None, @@ -281,8 +293,11 @@ def StarshipMigrationDAG( dag_ids: List[str] = None, **kwargs, ): + """ + DAG to fetch and migrate Variables, Pools, Connections, and DAGs with history from one Airflow instance to another. + """ dag = DAG( - dag_id="StarshipAirflowMigrationDAG", + dag_id="starship_airflow_migration_dag", schedule="@once", start_date=datetime(1970, 1, 1), tags=["migration", "starship"], @@ -291,32 +306,34 @@ def StarshipMigrationDAG( # Starship Migration DAG A DAG to migrate Airflow Variables, Pools, Connections, and DAG History from one Airflow instance to another. + You can use this DAG to migrate all items, or specific items by providing a list of names. + + You can skip migration by providing an empty list. + + ## Setup: + Make a connection in Airflow with the following details: + - **Conn ID**: `starship_default` + - **Conn Type**: `HTTP` + - **Host**: the URL of the homepage of Airflow (excluding `/home` on the end of the URL) + - For example, if your deployment URL is `https://astronomer.astronomer.run/abcdt4ry/home`, you'll use `https://astronomer.astronomer.run/abcdt4ry` + - **Schema**: `https` + - **Extras**: `{"Authorization": "Bearer "}` + ## Usage: ```python from astronomer_starship.providers.starship.operators.starship import ( - StarshipMigrationDAG, - ) - - # Make a connection in Airflow with the following details: - import os - - os.environ["AIRFLOW_CONN_STARSHIP_DEFAULT"] = ( - "{" - ' "conn_id": "starship_default", ' - ' "host": "", "port": 443, "schema": "https", ' - ' "extras": {"Authorization": "Bearer "}' - "}" + StarshipAirflowMigrationDAG, ) - globals()["StarshipAirflowMigrationDAG"] = StarshipMigrationDAG( + globals()["starship_airflow_migration_dag"] = StarshipAirflowMigrationDAG( http_conn_id="starship_default", - variables=["var1", "var2"], # or None to migrate all, or empty list to skip - pools=["pool1", "pool2"], # or None to migrate all, or empty list to skip - connections=["conn1", "conn2"], # or None to migrate all, or empty list to skip - dag_ids=["dag1", "dag2"], # or None to migrate all, or empty list to skip + variables=None, # None to migrate all, or ["var1", "var2"] to migrate specific items, or empty list to skip all + pools=None, # None to migrate all, or ["pool1", "pool2"] to migrate specific items, or empty list to skip all + connections=None, # None to migrate all, or ["conn1", "conn2"] to migrate specific items, or empty list to skip all + dag_ids=None, # None to migrate all, or ["dag1", "dag2"] to migrate specific items, or empty list to skip all ) ``` - """, + """, # noqa: E501 ) with dag: starship_variables_migration( diff --git a/docs/api.md b/docs/api.md index 68cb652..012ab4f 100644 --- a/docs/api.md +++ b/docs/api.md @@ -1,4 +1,19 @@ # API + +## Error Responses +In the event of an error, the API will return a JSON response with an `error` key +and an HTTP `status_code`. The `error` key will contain a message describing the error. + +| **Type** | **Status Code** | **Response Example** | +|-----------------------------------|-----------------|---------------------------------------------------------------------------------------------| +| **Request kwargs - RuntimeError** | 400 | ```{"error": "..."}``` | +| **Request kwargs - Exception** | 500 | ```{"error": "Unknown Error in kwargs_fn - ..."}``` | +| **Unknown Error** | 500 | ```{"error": "Unknown Error", "error_type": ..., "error_message": ..., "kwargs": ...}``` | +| **`POST` Integrity Error** | 409 | ```{"error": "Integrity Error (Duplicate Record?)", "error_message": ..., "kwargs": ...}``` | +| **`POST` Data Error** | 400 | ```{"error": "Data Error", "error_message": ..., "kwargs": ...}``` | +| **`POST` SQL Error** | 400 | ```{"error": "SQL Error", "error_message": ..., "kwargs": ...}``` | + + ## Airflow Version ::: astronomer_starship.starship_api.StarshipApi.airflow_version options: diff --git a/docs/index.md b/docs/index.md index b0536f2..bbf3b9b 100644 --- a/docs/index.md +++ b/docs/index.md @@ -17,7 +17,7 @@ Connections, Environment Variables, Pools, and DAG History between two Airflow i

Logo of Spaceship

diff --git a/docs/operator.md b/docs/operator.md index e685a9d..8edc2d3 100644 --- a/docs/operator.md +++ b/docs/operator.md @@ -1,69 +1,80 @@ -The Starship Operator should be used in instances where the Airflow Webserver is unable to correctly host a Plugin. +# Starship Migration DAG +The `StarshipAirflowMigrationDAG` can be used to migrate Airflow Variables, Pools, Connections, +and DAG History from one Airflow instance to another. -The `AstroMigrationOperator` should be used if migrating from a +The `StarshipAirflowMigrationDAG` should be used in instances where the **source** Airflow Webserver +is unable to correctly host a Plugin. The Target must still have a functioning Starship Plugin installed, +be running the same version of Airflow, and have the same set of DAGs deployed. + +The `StarshipAirflowMigrationDAG` should be used if migrating from a Google Cloud Composer 1 (with Airflow 2.x) or MWAA v2.0.2 environment. -These environments do not support webserver plugins and will require using the `AstroMigrationOperator` +These environments do not support webserver plugins and will require using the `StarshipAirflowMigrationDAG` to migrate data. ## Installation Add the following line to your `requirements.txt` in your source environment: ``` - astronomer-starship==1.2.1 + astronomer-starship ``` +## Setup +Make a connection in Airflow with the following details: +- **Conn ID**: `starship_default` +- **Conn Type**: `HTTP` +- **Host**: the URL of the homepage of Airflow (excluding `/home` on the end of the URL) + - For example, if your deployment URL is `https://astronomer.astronomer.run/abcdt4ry/home`, you'll use `https://astronomer.astronomer.run/abcdt4ry` +- **Schema**: `https` +- **Extras**: `{"Authorization": "Bearer "}` + ## Usage 1. Add the following DAG to your source environment: - ```python title="dags/astronomer_migration_dag.py" - from airflow import DAG + ```python title="dags/starship_airflow_migration_dag.py" + from astronomer_starship.providers.starship.operators.starship import StarshipAirflowMigrationDAG - from astronomer.starship.operators import AstroMigrationOperator - from datetime import datetime + globals()['starship_airflow_migration_dag'] = StarshipAirflowMigrationDAG(http_conn_id="starship_default") + ``` - with DAG( - dag_id="astronomer_migration_dag", - start_date=datetime(2020, 8, 15), - schedule_interval=None, - ) as dag: +2. Unpause the DAG in the Airflow UI +3. Once the DAG successfully runs, your connections, variables, and environment variables should all be migrated to Astronomer - AstroMigrationOperator( - task_id="export_meta", - deployment_url='{{ dag_run.conf["deployment_url"] }}', - token='{{ dag_run.conf["astro_token"] }}', - ) - ``` +## Configuration -3. Deploy this DAG to your source Airflow environment, configured as described in the **Configuration** section below -4. Once the DAG is available in the Airflow UI, click the "Trigger DAG" button, then click "Trigger DAG w/ config", and input the following in the configuration dictionary: - - `astro_token`: To retrieve anf Astronomer token, navigate to [cloud.astronomer.io/token](https://cloud.astronomer.io/token) and log in using your Astronomer credentials - - `deployment_url`: To retrieve a deployment URL - navigate to the Astronomer Airlow deployment that you'd like to migrate to in the Astronomer UI, click `Open Airflow` and copy the page URL (excluding `/home` on the end of the URL) - - For example, if your deployment URL is `https://astronomer.astronomer.run/abcdt4ry/home`, you'll use `https://astronomer.astronomer.run/abcdt4ry` - - The config dictionary used when triggering the DAG should be formatted as: - - ```json - { - "deployment_url": "your-deployment-url", - "astro_token": "your-astro-token" - } - ``` -5. Once the DAG successfully runs, your connections, variables, and environment variables should all be migrated to Astronomer +The `StarshipAirflowMigrationDAG` can be configured as follows: -### Configuration +```python +StarshipAirflowMigrationDAG( + http_conn_id="starship_default", + variables=None, # None to migrate all, or ["var1", "var2"] to migrate specific items, or empty list to skip all + pools=None, # None to migrate all, or ["pool1", "pool2"] to migrate specific items, or empty list to skip all + connections=None, # None to migrate all, or ["conn1", "conn2"] to migrate specific items, or empty list to skip all + dag_ids=None, # None to migrate all, or ["dag1", "dag2"] to migrate specific items, or empty list to skip all +) +``` -The `AstroMigrationOperator` can be configured as follows: +You can use this DAG to migrate all items, or specific items by providing a list of names. -- `variables_exclude_list`: List the individual Airflow Variables which you **do not** want to be migrated. Any Variables not listed will be migrated to the desination Airflow deployment. -- `connection_exclude_list`: List the individual Airflow Connections which you **do not** want to be migrated. Any Variables not listed will be migrated to the desination Airflow deployment. -- `env_include_list`: List the individual Environment Variables which you **do** want to be migrated. Only the Environment Variables listed will be migrated to the desination Airflow deployment. None are migrated by default. +You can skip migration by providing an empty list. - ```python - AstroMigrationOperator( - task_id="export_meta", - deployment_url='{{ dag_run.conf["deployment_url"] }}', - token='{{ dag_run.conf["astro_token"] }}', - variables_exclude_list=["some_var_1"], - connection_exclude_list=["some_conn_1"], - env_include_list=["FOO", "BAR"], - ) - ``` +## Python API + +### Hooks + +::: astronomer_starship.providers.starship.hooks.starship + options: + heading_level: 4 + show_root_toc_entry: false + show_root_heading: false + inherited_members: true + show_source: false + +### Operators, TaskGroups, DAG + +::: astronomer_starship.providers.starship.operators.starship + options: + heading_level: 4 + show_root_toc_entry: false + show_root_heading: false + inherited_members: true + show_source: false diff --git a/starship.png b/docs/starship.png similarity index 100% rename from starship.png rename to docs/starship.png diff --git a/starship.svg b/docs/starship.svg similarity index 100% rename from starship.svg rename to docs/starship.svg diff --git a/starship_diagram.svg b/docs/starship_diagram.svg similarity index 100% rename from starship_diagram.svg rename to docs/starship_diagram.svg diff --git a/mkdocs.yml b/mkdocs.yml index 94619b2..f8d16ce 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -6,6 +6,9 @@ site_url: https://astronomer.github.io/starship/ theme: name: material + logo: starship.svg + favicon: starship.png + palette: # Palette toggle for light mode - media: "(prefers-color-scheme: light)" @@ -45,6 +48,9 @@ theme: - search.suggest markdown_extensions: +- toc: + permalink: true + - pymdownx.superfences - pymdownx.highlight: use_pygments: true From 4e40ca54ec28b84db2f844297c3de2d689068e46 Mon Sep 17 00:00:00 2001 From: fritz-astronomer <80706212+fritz-astronomer@users.noreply.github.com> Date: Fri, 31 May 2024 14:39:05 -0400 Subject: [PATCH 21/28] remove test --- tests/operator_test.py | 106 ----------------------------------------- 1 file changed, 106 deletions(-) delete mode 100644 tests/operator_test.py diff --git a/tests/operator_test.py b/tests/operator_test.py deleted file mode 100644 index 43a35b9..0000000 --- a/tests/operator_test.py +++ /dev/null @@ -1,106 +0,0 @@ -import datetime -import os -from airflow import DAG -import pytest -from astronomer_starship.compat.starship_compatability import ( - StarshipAirflow, - get_test_data, -) -from astronomer_starship.compat.starship_hook import StarshipAPIHook -from astronomer_starship.compat.starship_operator import StarshipOperator -from tests.conftest import manual_tests -from tests.api_integration_test import ( - get_extras, -) - - -@pytest.fixture -def starship_hook_and_starship( - url_and_token_and_starship, -) -> tuple[StarshipAPIHook, StarshipAirflow]: - (url, token, starship) = url_and_token_and_starship - return StarshipAPIHook(webserver_url=url, **get_extras(url, token)), starship - - -def get_json_test_data(attrs, method=None): - test_data = get_test_data(attrs=attrs, method=method) - json_serializable = { - k: v if not isinstance(v, datetime.datetime) else v.isoformat() - for k, v in test_data.items() - } - return json_serializable - - -class TestStarshipApiHook: - @manual_tests - def test_get_dags(self, starship_hook_and_starship): - hook, _ = starship_hook_and_starship - dags = hook.get_dags() - assert len(dags) > 0, dags - - @manual_tests - def test_set_and_get_dag_runs(self, starship_hook_and_starship): - hook, starship = starship_hook_and_starship - post_payload = get_json_test_data(method="POST", attrs=starship.dag_run_attrs()) - set_runs = hook.set_dag_runs(dag_runs=[post_payload]) - assert set_runs - assert ( - "dag_id" in set_runs - or set_runs["error"] == "Integrity Error (Duplicate Record?)" - ) - get_runs = hook.get_dag_runs(dag_id=post_payload["dag_id"]) - assert get_runs, get_runs == post_payload - - @manual_tests - def test_set_and_get_task_instances(self, starship_hook_and_starship): - hook, starship = starship_hook_and_starship - post_payload = get_json_test_data( - method="POST", attrs=starship.task_instance_attrs() - ) - set_tis = hook.set_task_instances(task_instances=[post_payload]) - assert set_tis - assert ( - "task_instances" in set_tis - or set_tis["error"] == "Integrity Error (Duplicate Record?)" - ) - get_tis = hook.get_task_instances(dag_id=post_payload["dag_id"], limit=1) - assert "dag_run_count" in get_tis, get_tis - assert len(get_tis["task_instances"]) == 1 - - @manual_tests - @pytest.mark.parametrize("action", ["unpause", "pause"]) - def test_patch_dag_state(self, starship_hook_and_starship, action): - hook, _ = starship_hook_and_starship - example_dag = hook.get_dags()[0]["dag_id"] - resp = hook.set_dag_state(dag_id=example_dag, action=action) - assert resp.status_code == 200, "dag_id" in resp.json() - - @manual_tests - def test_get_latest_dagrun_state(self, starship_hook_and_starship): - hook, starship = starship_hook_and_starship - example_dag_run = get_test_data(starship.dag_run_attrs()) - latest_state = hook.get_latest_dagrun_state(dag_id=example_dag_run["dag_id"]) - assert latest_state == example_dag_run["state"] - - -@manual_tests -def test_starship_migration_operator(): - dag = DAG("test_dag", default_args={}) - starship_operator = StarshipOperator( - task_id="test_operator", - dag=dag, - ) - dagrun_conf = { - "source_webserver_url": "http://localhost:8080", - "source_auth": ["admin", "admin"], - "target_webserver_url": os.getenv("TARGET_WEBSERVER_URL"), - "target_headers": ( - {"Authorization": f"Bearer {os.getenv('TARGET_TOKEN')}"} - if os.getenv("TARGET_TOKEN") - else None - ), - "target_auth": os.getenv("TARGET_AUTH"), - } - - ctx = {"conf": dagrun_conf} - starship_operator.execute(ctx) From 465f3c7f8498778062325c3b7d83d6b98cc5f1de Mon Sep 17 00:00:00 2001 From: fritz-astronomer <80706212+fritz-astronomer@users.noreply.github.com> Date: Fri, 31 May 2024 14:44:48 -0400 Subject: [PATCH 22/28] bump to 2.0.4 --- astronomer_starship/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/astronomer_starship/__init__.py b/astronomer_starship/__init__.py index 1abd8a1..ea2396a 100644 --- a/astronomer_starship/__init__.py +++ b/astronomer_starship/__init__.py @@ -1,4 +1,4 @@ -__version__ = "2.0.3" +__version__ = "2.0.4" def get_provider_info(): From 403fd930e44c7941fc2c962d265e017e3fa89215 Mon Sep 17 00:00:00 2001 From: fritz-astronomer <80706212+fritz-astronomer@users.noreply.github.com> Date: Fri, 7 Jun 2024 14:36:16 -0400 Subject: [PATCH 23/28] swap print -> logging.info --- .../providers/starship/operators/starship.py | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/astronomer_starship/providers/starship/operators/starship.py b/astronomer_starship/providers/starship/operators/starship.py index 36549c8..629a88f 100644 --- a/astronomer_starship/providers/starship/operators/starship.py +++ b/astronomer_starship/providers/starship/operators/starship.py @@ -1,4 +1,5 @@ """Operators, TaskGroups, and DAGs for interacting with the Starship migrations.""" +import logging from datetime import datetime from typing import Any, Union, List @@ -37,13 +38,13 @@ def __init__(self, variable_key: Union[str, None] = None, **kwargs): self.variable_key = variable_key def execute(self, context: Context) -> Any: - print("Getting Variable", self.variable_key) + logging.info("Getting Variable", self.variable_key) variables = self.source_hook.get_variables() variable: Union[dict, None] = ( [v for v in variables if v["key"] == self.variable_key] or [None] )[0] if variable is not None: - print("Migrating Variable", self.variable_key) + logging.info("Migrating Variable", self.variable_key) self.target_hook.set_variable(**variable) else: raise RuntimeError("Variable not found! " + self.variable_key) @@ -90,13 +91,13 @@ def __init__(self, pool_name: Union[str, None] = None, **kwargs): self.pool_name = pool_name def execute(self, context: Context) -> Any: - print("Getting Pool", self.pool_name) + logging.info("Getting Pool", self.pool_name) pool: Union[dict, None] = ( [v for v in self.source_hook.get_pools() if v["name"] == self.pool_name] or [None] )[0] if pool is not None: - print("Migrating Pool", self.pool_name) + logging.info("Migrating Pool", self.pool_name) self.target_hook.set_pool(**pool) else: raise RuntimeError("Pool not found!") @@ -140,7 +141,7 @@ def __init__(self, connection_id: Union[str, None] = None, **kwargs): self.connection_id = connection_id def execute(self, context: Context) -> Any: - print("Getting Connection", self.connection_id) + logging.info("Getting Connection", self.connection_id) connection: Union[dict, None] = ( [ v @@ -150,7 +151,7 @@ def execute(self, context: Context) -> Any: or [None] )[0] if connection is not None: - print("Migrating Connection", self.connection_id) + logging.info("Migrating Connection", self.connection_id) self.target_hook.set_connection(**connection) else: raise RuntimeError("Connection not found!") @@ -204,18 +205,18 @@ def __init__( self.dag_run_limit = dag_run_limit def execute(self, context): - print("Pausing local DAG for", self.target_dag_id) + logging.info("Pausing local DAG for", self.target_dag_id) self.source_hook.set_dag_is_paused(dag_id=self.target_dag_id, is_paused=True) # TODO - Poll until all tasks are done - print("Getting local DAG Runs for", self.target_dag_id) + logging.info("Getting local DAG Runs for", self.target_dag_id) dag_runs = self.source_hook.get_dag_runs( dag_id=self.target_dag_id, limit=self.dag_run_limit ) if len(dag_runs["dag_runs"]) == 0: raise AirflowSkipException("No DAG Runs found for " + self.target_dag_id) - print("Getting local Task Instances for", self.target_dag_id) + logging.info("Getting local Task Instances for", self.target_dag_id) task_instances = self.source_hook.get_task_instances( dag_id=self.target_dag_id, limit=self.dag_run_limit ) @@ -224,16 +225,16 @@ def execute(self, context): "No Task Instances found for " + self.target_dag_id ) - print("Setting target DAG Runs for", self.target_dag_id) + logging.info("Setting target DAG Runs for", self.target_dag_id) self.target_hook.set_dag_runs(dag_runs=dag_runs["dag_runs"]) - print("Setting target Task Instances for", self.target_dag_id) + logging.info("Setting target Task Instances for", self.target_dag_id) self.target_hook.set_task_instances( task_instances=task_instances["task_instances"] ) if self.unpause_dag_in_target: - print("Unpausing target DAG for", self.target_dag_id) + logging.info("Unpausing target DAG for", self.target_dag_id) self.target_hook.set_dag_is_paused( dag_id=self.target_dag_id, is_paused=False ) From 69a56eed56bf5b36ce43a28da05798815ce994a3 Mon Sep 17 00:00:00 2001 From: fritz-astronomer <80706212+fritz-astronomer@users.noreply.github.com> Date: Fri, 7 Jun 2024 14:52:05 -0400 Subject: [PATCH 24/28] isolate queries to constants.js --- astronomer_starship/src/constants.js | 39 ++++++++++++++ astronomer_starship/src/pages/EnvVarsPage.jsx | 42 ++++----------- astronomer_starship/src/pages/SetupPage.jsx | 53 ++++++++----------- 3 files changed, 69 insertions(+), 65 deletions(-) diff --git a/astronomer_starship/src/constants.js b/astronomer_starship/src/constants.js index 873cba8..1a4a619 100644 --- a/astronomer_starship/src/constants.js +++ b/astronomer_starship/src/constants.js @@ -9,3 +9,42 @@ const constants = { TASK_INSTANCE_ROUTE: '/api/starship/task_instances', }; export default constants; + +export const updateDeploymentVariablesMutation = ` +mutation UpdateDeploymentVariables( + $deploymentUuid:Uuid!, + $releaseName:String!, + $environmentVariables: [InputEnvironmentVariable!]! +) { + updateDeploymentVariables( + deploymentUuid: $deploymentUuid, + releaseName: $releaseName, + environmentVariables: $environmentVariables + ) { + key + value + isSecret + } +}`; + +export const getDeploymentsQuery = `query deploymentVariables($deploymentUuid: Uuid!, $releaseName: String!) { + deploymentVariables( + deploymentUuid: $deploymentUuid + releaseName: $releaseName + ) { + key + value + isSecret + } +}`; + +export const getWorkspaceDeploymentsQuery = ` +query workspaces { + workspaces { + id + deployments { + id + releaseName + } + } +}`; diff --git a/astronomer_starship/src/pages/EnvVarsPage.jsx b/astronomer_starship/src/pages/EnvVarsPage.jsx index c943106..c10788e 100644 --- a/astronomer_starship/src/pages/EnvVarsPage.jsx +++ b/astronomer_starship/src/pages/EnvVarsPage.jsx @@ -1,8 +1,6 @@ import React, { useEffect, useState } from 'react'; import { createColumnHelper } from '@tanstack/react-table'; -import { - Text, Button, useToast, HStack, Spacer, -} from '@chakra-ui/react'; +import { Button, HStack, Spacer, Text, useToast, } from '@chakra-ui/react'; import PropTypes from 'prop-types'; import axios from 'axios'; import { MdErrorOutline } from 'react-icons/md'; @@ -11,39 +9,17 @@ import { GoUpload } from 'react-icons/go'; import { RepeatIcon } from '@chakra-ui/icons'; import StarshipPage from '../component/StarshipPage'; import { - fetchData, getAstroEnvVarRoute, getHoustonRoute, localRoute, proxyHeaders, proxyUrl, remoteRoute, + fetchData, + getAstroEnvVarRoute, + getHoustonRoute, + localRoute, + proxyHeaders, + proxyUrl, + remoteRoute, } from '../util'; -import constants from '../constants'; +import constants, { getDeploymentsQuery, updateDeploymentVariablesMutation } from '../constants'; import HiddenValue from "../component/HiddenValue.jsx"; -const getDeploymentsQuery = `query deploymentVariables($deploymentUuid: Uuid!, $releaseName: String!) { - deploymentVariables( - deploymentUuid: $deploymentUuid - releaseName: $releaseName - ) { - key - value - isSecret - } -}`; - -const updateDeploymentVariablesMutation = ` -mutation UpdateDeploymentVariables( - $deploymentUuid:Uuid!, - $releaseName:String!, - $environmentVariables: [InputEnvironmentVariable!]! -) { - updateDeploymentVariables( - deploymentUuid: $deploymentUuid, - releaseName: $releaseName, - environmentVariables: $environmentVariables - ) { - key - value - isSecret - } -}`; - function EnvVarMigrateButton({ route, headers, existsInRemote, sendData, isAstro, deploymentId, releaseName diff --git a/astronomer_starship/src/pages/SetupPage.jsx b/astronomer_starship/src/pages/SetupPage.jsx index 05834f4..dfab486 100644 --- a/astronomer_starship/src/pages/SetupPage.jsx +++ b/astronomer_starship/src/pages/SetupPage.jsx @@ -1,49 +1,34 @@ import { Box, + Button, Divider, - VStack, - Text, - InputGroup, - Input, - InputRightAddon, - InputLeftAddon, - FormLabel, + Fade, FormControl, - Switch, + FormErrorMessage, + FormHelperText, + FormLabel, HStack, + Input, + InputGroup, + InputLeftAddon, + InputRightAddon, + InputRightElement, Link, SlideFade, - Button, - Fade, - FormErrorMessage, - FormHelperText, - InputRightElement, useColorMode, Spacer, + Spacer, + Switch, + Text, + VStack, } from '@chakra-ui/react'; import React, { useEffect } from 'react'; import PropTypes from 'prop-types'; -import { - CheckIcon, ExternalLinkIcon, RepeatIcon, -} from '@chakra-ui/icons'; +import { CheckIcon, ExternalLinkIcon, RepeatIcon, } from '@chakra-ui/icons'; import { IoTelescopeOutline } from 'react-icons/io5'; import { NavLink } from 'react-router-dom'; import { getHoustonRoute, getTargetUrlFromParts, proxyHeaders, proxyUrl, tokenUrlFromAirflowUrl } from '../util'; import ValidatedUrlCheckbox from '../component/ValidatedUrlCheckbox'; import axios from "axios"; - -const workspaceDeploymentsQuery = { - operationName: "workspaces", - query: ` -query workspaces { - workspaces { - id - deployments { - id - releaseName - } - } -}`, - variables: {} -}; +import { getWorkspaceDeploymentsQuery } from "../constants.js"; export default function SetupPage({ state, dispatch }) { // Get the workspace ID & etc. if it's software and setup is completed @@ -56,7 +41,11 @@ export default function SetupPage({ state, dispatch }) { ){ axios.post( proxyUrl(getHoustonRoute(state.urlOrgPart)), - workspaceDeploymentsQuery, + { + operationName: "workspaces", + query: getWorkspaceDeploymentsQuery, + variables: {} + }, { headers: proxyHeaders(state.token) } From ce765669c89d0dff164abcfcddb779a9199a1ac6 Mon Sep 17 00:00:00 2001 From: fritz-astronomer <80706212+fritz-astronomer@users.noreply.github.com> Date: Fri, 7 Jun 2024 14:54:41 -0400 Subject: [PATCH 25/28] remove console.log --- astronomer_starship/src/pages/TelescopePage.jsx | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/astronomer_starship/src/pages/TelescopePage.jsx b/astronomer_starship/src/pages/TelescopePage.jsx index 401e22f..d3a8fdc 100644 --- a/astronomer_starship/src/pages/TelescopePage.jsx +++ b/astronomer_starship/src/pages/TelescopePage.jsx @@ -35,7 +35,6 @@ export default function TelescopePage({ state, dispatch }) { setRoute(_route); const _filename = `${state.telescopeOrganizationId}.${(new Date()).toISOString().slice(0,10)}.data.json` setFilename(_filename); - console.log(_route, _filename); }, [state]); return ( @@ -114,10 +113,7 @@ export default function TelescopePage({ state, dispatch }) { setError(err); }; axios.get(route) - .then((res) => { - console.log(res.data); - setIsUploadComplete(true); - }) + .then((res) => setIsUploadComplete(true)) .catch(errFn) .finally(() => { setIsUploading(false); From 66ae5b531a1c6bc0e3bdbe02e67a7492623341b7 Mon Sep 17 00:00:00 2001 From: fritz-astronomer <80706212+fritz-astronomer@users.noreply.github.com> Date: Fri, 7 Jun 2024 14:59:48 -0400 Subject: [PATCH 26/28] move around python imports --- astronomer_starship/starship_api.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/astronomer_starship/starship_api.py b/astronomer_starship/starship_api.py index 628c1f8..27fc5f2 100644 --- a/astronomer_starship/starship_api.py +++ b/astronomer_starship/starship_api.py @@ -7,11 +7,6 @@ from airflow.www.app import csrf from flask import Blueprint, request, jsonify from flask_appbuilder import expose, BaseView -import os -from typing import Any, Dict, List, Union -import base64 -import logging -from json import JSONDecodeError from astronomer_starship.compat.starship_compatability import ( StarshipCompatabilityLayer, @@ -21,11 +16,14 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import Callable + from typing import Callable, Any, Dict, List, Union def get_json_or_clean_str(o: str) -> Union[List[Any], Dict[Any, Any], Any]: """For Aeroscope - Either load JSON (if we can) or strip and split the string, while logging the error""" + from json import JSONDecodeError + import logging + try: return json.loads(o) except (JSONDecodeError, TypeError) as e: @@ -44,6 +42,8 @@ def clean_airflow_report_output(log_string: str) -> Union[dict, str]: ... ) {'output': 'hello world'} """ + from json import JSONDecodeError + import base64 log_lines = log_string.split("\n") enumerated_log_lines = list(enumerate(log_lines)) @@ -182,6 +182,7 @@ def telescope(self): from contextlib import redirect_stdout, redirect_stderr from urllib.error import HTTPError from datetime import datetime, timezone + import os aero_version = os.getenv("TELESCOPE_REPORT_RELEASE_VERSION", "latest") a = "airflow_report.pyz" From 1da202c318cb7107cd777e1b7a3a189be2077ed5 Mon Sep 17 00:00:00 2001 From: fritz-astronomer <80706212+fritz-astronomer@users.noreply.github.com> Date: Fri, 7 Jun 2024 15:08:03 -0400 Subject: [PATCH 27/28] update operator doc install instructions --- docs/operator.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/operator.md b/docs/operator.md index 8edc2d3..eadb458 100644 --- a/docs/operator.md +++ b/docs/operator.md @@ -15,7 +15,7 @@ to migrate data. Add the following line to your `requirements.txt` in your source environment: ``` - astronomer-starship + astronomer-starship[provider] ``` ## Setup From 61b6836e57c2a52dc6eb737cb2aa9d014015f3e9 Mon Sep 17 00:00:00 2001 From: fritz-astronomer <80706212+fritz-astronomer@users.noreply.github.com> Date: Fri, 7 Jun 2024 15:29:37 -0400 Subject: [PATCH 28/28] move back type hint imports --- astronomer_starship/starship_api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/astronomer_starship/starship_api.py b/astronomer_starship/starship_api.py index 27fc5f2..0d99cd5 100644 --- a/astronomer_starship/starship_api.py +++ b/astronomer_starship/starship_api.py @@ -13,10 +13,11 @@ get_kwargs_fn, ) +from typing import Any, Dict, List, Union from typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import Callable, Any, Dict, List, Union + from typing import Callable def get_json_or_clean_str(o: str) -> Union[List[Any], Dict[Any, Any], Any]: