Devices isolation process (#108)

huggingface · Jan 12, 2024 · 3c75320 · 3c75320
1 parent 0c9c300
commit 3c75320
Show file tree

Hide file tree

Showing 17 changed files with 253 additions and 277 deletions.
diff --git a/README.md b/README.md
@@ -36,6 +36,12 @@ Everything else is either optional or inferred from the model's name or path.
 - [x] Intel Neural Compressor backend for CPU
 - [x] OpenVINO backend for CPU
 
+### Launcher features
+
+- [x] Process isolation between consecutive runs (`launcher=process`)
+- [x] Assert devices (NVIDIA & AMD GPUs) isolation (`launcher.device_isolation=true`)
+- [x] Distributed inference/training (`launcher=torchrun`, `launcher.n_proc_per_node=2`, etc)
+
 ### Benchmark features
 
 - [x] Memory tracking (`benchmark.memory=true`)
@@ -58,7 +64,6 @@ Everything else is either optional or inferred from the model's name or path.
 - [x] BitsAndBytes quantization scheme (`backend.quantization_scheme=bnb`, `backend.quantization_config.load_in_4bit`, etc)
 - [x] GPTQ quantization scheme (`backend.quantization_scheme=gptq`, `backend.quantization_config.bits=4`, etc)
 - [x] PEFT training (`backend.peft_strategy=lora`, `backend.peft_config.task_type=CAUSAL_LM`, etc)
-- [x] Distributed inference/training (`launcher=torchrun`, `launcher.n_proc_per_node=2`, etc)
 - [x] Transformers' Flash Attention V2 (`backend.use_flash_attention_v2=true`)
 - [x] Optimum's BetterTransformer (`backend.to_bettertransformer=true`)
 - [x] DeepSpeed-Inference support (`backend.deepspeed_inference=true`)

diff --git a/examples/openvino_diffusion.yaml b/examples/openvino_diffusion.yaml
@@ -11,6 +11,9 @@ experiment_name: openvino_diffusion
 model: stabilityai/stable-diffusion-2-1
 device: cpu
 
+launcher:
+  device_isolation: true
+
 backend:
   export: true
   reshape: true

diff --git a/examples/pytorch_bert.yaml b/examples/pytorch_bert.yaml
@@ -1,6 +1,6 @@
 defaults:
   - backend: pytorch # default backend
-  - launcher: inline # default launcher
+  - launcher: torchrun # default launcher
   - benchmark: inference # default benchmark
   - experiment # inheriting experiment schema
   - _self_ # for hydra 1.1 compatibility
@@ -11,6 +11,9 @@ experiment_name: pytorch_bert
 model: bert-base-uncased
 device: cuda
 
+launcher:
+  device_isolation: true
+
 hydra:
   run:
     dir: runs/${experiment_name}

diff --git a/examples/pytorch_llama.yaml b/examples/pytorch_llama.yaml
@@ -11,8 +11,8 @@ experiment_name: pytorch_llama
 model: TheBloke/Llama-2-70B-AWQ
 device: cuda
 
-backend:
-  continuous_isolation: false
+launcher:
+  device_isolation: true
 
 benchmark:
   input_shapes:

diff --git a/examples/running-llamas/configs/_base_.yaml b/examples/running-llamas/configs/_base_.yaml
@@ -14,7 +14,6 @@ device: cuda
 backend:
   no_weights: true
   torch_dtype: float16
-  continuous_isolation: true
 
 benchmark:
   memory: true

diff --git a/examples/tgi_llama.yaml b/examples/tgi_llama.yaml
@@ -14,7 +14,6 @@ device: cuda
 backend:
   sharded: false
   quantization_scheme: awq
-  continuous_isolation: false
   # no_weights: true # wok in progress
 
 benchmark:

diff --git a/examples/training-llamas/configs/_base_.yaml b/examples/training-llamas/configs/_base_.yaml
@@ -14,7 +14,6 @@ device: cuda
 backend:
   no_weights: true
   torch_dtype: float16
-  continuous_isolation: true
 
 benchmark:
   warmup_steps: 40

diff --git a/examples/trt_llama.yaml b/examples/trt_llama.yaml
@@ -11,9 +11,6 @@ experiment_name: trt_llama
 model: NousResearch/Llama-2-7b-hf
 device: cuda
 
-backend:
-  continuous_isolation: false
-
 benchmark:
   input_shapes:
     batch_size: 1

diff --git a/optimum_benchmark/backends/base.py b/optimum_benchmark/backends/base.py
@@ -4,7 +4,6 @@
 import shutil
 from abc import ABC
 from logging import getLogger
-from multiprocessing import Process
 from typing import Any, Callable, ClassVar, Dict, Generic, Optional, Union
 
 import numpy as np
@@ -25,7 +24,6 @@
     get_model_class_for_task,
 )
 from .config import BackendConfigT
-from .isolation_utils import check_cuda_continuous_isolation
 from .utils import (
     PreTrainedProcessor,
     extract_shapes_from_diffusion_pipeline,
@@ -41,7 +39,6 @@ class Backend(Generic[BackendConfigT], ABC):
     library: str
     model_type: str
     config: BackendConfigT
-    isolation_thread: Optional[Process]
     pretrained_model: Union[PreTrainedModel, Pipeline]
     pretrained_config: Optional[PretrainedConfig]
     pretrained_processor: Optional[PreTrainedProcessor]
@@ -89,7 +86,8 @@ def __init__(self, model: str, task: str, device: str, hub_kwargs: Dict[str, Any
             self.pretrained_generation_config = None
 
         self.automodel_class = get_model_class_for_task(
-            framework="pt",  # TODO: make this configurable to add support for other frameworks
+            # TODO: make this configurable to add support for other frameworks
+            framework="pt",
             task=self.task,
             library=self.library,
             model_type=self.model_type,
@@ -105,34 +103,17 @@ def configure(self, config: BackendConfigT) -> None:
         LOGGER.info(f"Configuring {self.NAME} backend")
         self.config = config
 
-        # isolation options
-        if self.config.continuous_isolation:
-            LOGGER.info("\t+ Running continuous isolation check")
-            self.check_continuous_isolation()
-
         # clean up options
         if self.config.delete_cache:
             LOGGER.info("\t+ Model cache will be deleted after benchmark")
 
-    def check_continuous_isolation(self) -> None:
-        if self.device == "cuda":
-            self.isolation_process = Process(
-                target=check_cuda_continuous_isolation,
-                kwargs={
-                    "isolated_pid": os.getpid(),
-                    "isolation_check_interval": self.config.isolation_check_interval,
-                },
-                daemon=True,
-            )
-            self.isolation_process.start()
-            LOGGER.info(f"\t+ Started isolation process with PID {self.isolation_process.pid}")
-        else:
-            raise ValueError("Continuous isolation is only supported for CUDA devices")
-
     def seed(self) -> None:
         random.seed(self.config.seed)
         np.random.seed(self.config.seed)
 
+    def prepare_for_inference(self, **kwargs) -> None:
+        pass
+
     def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
         # TODO: move this to only backends that need it (non cpu backends)
         if self.is_diffusion_pipeline():
@@ -144,9 +125,6 @@ def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
 
         return inputs
 
-    def prepare_for_inference(self, **kwargs) -> None:
-        pass
-
     def forward(self, input: Dict[str, Any], kwargs: Dict[str, Any]) -> ModelOutput:
         return self.pretrained_model(**input, **kwargs)
 
@@ -181,22 +159,11 @@ def delete_hf_model_cache(self) -> None:
         model_cache_path = os.path.join(os.path.expanduser("~/.cache/huggingface/hub"), model_cache_folder)
         shutil.rmtree(model_cache_path, ignore_errors=True)
 
-    def terminate_isolation_process(self) -> None:
-        LOGGER.info("\t+ Terminating isolation process")
-        self.isolation_process.kill()
-        self.isolation_process.join()
-        self.isolation_process.close()
-
     def clean(self) -> None:
         LOGGER.info(f"Cleaning {self.NAME} backend")
 
-        if self.config.continuous_isolation:
-            self.terminate_isolation_process()
-
         if hasattr(self, "pretrained_model"):
             self.delete_pretrained_model()
 
         if self.config.delete_cache:
             self.delete_hf_model_cache()
-
-        gc.collect()
diff --git a/optimum_benchmark/backends/config.py b/optimum_benchmark/backends/config.py
@@ -19,10 +19,6 @@ class BackendConfig(ABC):
     inter_op_num_threads: Optional[int] = None
     intra_op_num_threads: Optional[int] = None
 
-    # device isolation options
-    continuous_isolation: bool = True
-    isolation_check_interval: Optional[float] = None
-
     # clean up options
     delete_cache: bool = False
 
@@ -35,8 +31,5 @@ def __post_init__(self):
             if self.intra_op_num_threads == -1:
                 self.intra_op_num_threads = cpu_count()
 
-        if self.continuous_isolation and self.isolation_check_interval is None:
-            self.isolation_check_interval = 1
-
 
 BackendConfigT = TypeVar("BackendConfigT", bound=BackendConfig)
diff --git a/optimum_benchmark/backends/isolation_utils.py b/optimum_benchmark/backends/isolation_utils.py