-
Notifications
You must be signed in to change notification settings - Fork 308
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[FEAT] add driver/executor pod in Spark #3016
base: master
Are you sure you want to change the base?
Changes from 2 commits
8cc081d
7793398
b21d1e3
00d0c3a
1b7c1c9
32f6aa9
167a390
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -10,9 +10,12 @@ | |||||||||
from flytekit import FlyteContextManager, PythonFunctionTask, lazy_module, logger | ||||||||||
from flytekit.configuration import DefaultImages, SerializationSettings | ||||||||||
from flytekit.core.context_manager import ExecutionParameters | ||||||||||
from flytekit.core.utils import _get_container_definition, _serialize_pod_spec, timeit | ||||||||||
from flytekit.core.pod_template import PodTemplate | ||||||||||
from flytekit.extend import ExecutionState, TaskPlugins | ||||||||||
from flytekit.extend.backend.base_agent import AsyncAgentExecutorMixin | ||||||||||
from flytekit.image_spec import ImageSpec | ||||||||||
from flytekit.models.task import K8sPod, K8sObjectMetadata | ||||||||||
|
||||||||||
from .models import SparkJob, SparkType | ||||||||||
|
||||||||||
|
@@ -31,12 +34,16 @@ class Spark(object): | |||||||||
hadoop_conf: Dictionary of hadoop conf. The variables should match a typical hadoop configuration for spark | ||||||||||
executor_path: Python binary executable to use for PySpark in driver and executor. | ||||||||||
applications_path: MainFile is the path to a bundled JAR, Python, or R file of the application to execute. | ||||||||||
driver_pod: K8sPod for Spark driver pod | ||||||||||
executor_pod: K8sPod for Spark executor pod | ||||||||||
""" | ||||||||||
|
||||||||||
spark_conf: Optional[Dict[str, str]] = None | ||||||||||
hadoop_conf: Optional[Dict[str, str]] = None | ||||||||||
executor_path: Optional[str] = None | ||||||||||
applications_path: Optional[str] = None | ||||||||||
driver_pod: Optional[PodTemplate] = None | ||||||||||
executor_pod: Optional[PodTemplate] = None | ||||||||||
|
||||||||||
def __post_init__(self): | ||||||||||
if self.spark_conf is None: | ||||||||||
|
@@ -168,6 +175,8 @@ def get_custom(self, settings: SerializationSettings) -> Dict[str, Any]: | |||||||||
executor_path=self._default_executor_path or settings.python_interpreter, | ||||||||||
main_class="", | ||||||||||
spark_type=SparkType.PYTHON, | ||||||||||
driver_pod=self.to_k8s_pod(self.task_config.driver_pod, settings), | ||||||||||
executor_pod=self.to_k8s_pod(self.task_config.executor_pod, settings), | ||||||||||
) | ||||||||||
if isinstance(self.task_config, (Databricks, DatabricksV2)): | ||||||||||
cfg = cast(DatabricksV2, self.task_config) | ||||||||||
|
@@ -176,6 +185,22 @@ def get_custom(self, settings: SerializationSettings) -> Dict[str, Any]: | |||||||||
|
||||||||||
return MessageToDict(job.to_flyte_idl()) | ||||||||||
|
||||||||||
def to_k8s_pod(self, pod_template: PodTemplate | None, settings: SerializationSettings) -> K8sPod | None: | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider adding return type hints
Consider adding type hints for the return value of Code suggestionCheck the AI-generated fix before applying
Suggested change
Code Review Run #3c7587 Is this a valid issue, or was it incorrectly flagged by the Agent?
|
||||||||||
""" | ||||||||||
Convert the podTemplate to K8sPod | ||||||||||
""" | ||||||||||
if pod_template is None: | ||||||||||
return None | ||||||||||
|
||||||||||
return K8sPod( | ||||||||||
pod_spec=_serialize_pod_spec(pod_template, self._get_container(settings), settings), | ||||||||||
metadata=K8sObjectMetadata( | ||||||||||
labels=pod_template.labels, | ||||||||||
annotations=pod_template.annotations, | ||||||||||
), | ||||||||||
) | ||||||||||
|
||||||||||
|
||||||||||
def pre_execute(self, user_params: ExecutionParameters) -> ExecutionParameters: | ||||||||||
import pyspark as _pyspark | ||||||||||
|
||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider adding
driver_pod
andexecutor_pod
to thewith_overrides
method to ensure consistent pod configuration overrides.Code suggestion
Code Review Run #3c7587
Is this a valid issue, or was it incorrectly flagged by the Agent?