Skip to content

Commit

Permalink
Support setting direct_running_mode for LocalDirectRunner
Browse files Browse the repository at this point in the history
This also changes the default local one from multi_processing
to multi_threading. The primary use here is to make the logging
config from the *runner* be inherited by the *workers* too, which
helps with debugging quite a bit.
  • Loading branch information
yuvipanda authored and moradology committed Feb 13, 2024
1 parent 0eda0ca commit ff36901
Showing 1 changed file with 20 additions and 2 deletions.
22 changes: 20 additions & 2 deletions pangeo_forge_runner/bakery/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""

from apache_beam.pipeline import PipelineOptions
from traitlets import Integer
from traitlets import Integer, Unicode

from .base import Bakery

Expand All @@ -29,6 +29,24 @@ class LocalDirectBakery(Bakery):
""",
)

direct_running_mode = Unicode(
"multi_threading",
config=True,
help="""
One of 'in_memory', 'multi_threading', 'multi_processing'.
in_memory: Runner and workers’ communication happens in memory (not through gRPC). This is a default mode.
multi_threading: Runner and workers communicate through gRPC and each worker runs in a thread.
multi_processing: Runner and workers communicate through gRPC and each worker runs in a subprocess.
multi_processing is closest to most production runners, as it enables real usage of multiple
CPUs on the host machine. **However**, it can mess up logging, so is not the default here.
https://beam.apache.org/documentation/runners/direct/#setting-parallelism has more
information.
""",
)

def get_pipeline_options(
self, job_name: str, container_image: str, extra_options: dict
) -> PipelineOptions:
Expand All @@ -40,7 +58,7 @@ def get_pipeline_options(
return PipelineOptions(
flags=[],
runner="DirectRunner",
direct_running_mode="multi_processing",
direct_running_mode=self.direct_running_mode,
direct_num_workers=self.num_workers,
save_main_session=True,
# this might solve serialization issues; cf. https://beam.apache.org/blog/beam-2.36.0/
Expand Down

0 comments on commit ff36901

Please sign in to comment.