Skip to content

Commit

Permalink
Devices isolation process (#108)
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil authored Jan 12, 2024
1 parent 0c9c300 commit 3c75320
Show file tree
Hide file tree
Showing 17 changed files with 253 additions and 277 deletions.
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ Everything else is either optional or inferred from the model's name or path.
- [x] Intel Neural Compressor backend for CPU
- [x] OpenVINO backend for CPU

### Launcher features

- [x] Process isolation between consecutive runs (`launcher=process`)
- [x] Assert devices (NVIDIA & AMD GPUs) isolation (`launcher.device_isolation=true`)
- [x] Distributed inference/training (`launcher=torchrun`, `launcher.n_proc_per_node=2`, etc)

### Benchmark features

- [x] Memory tracking (`benchmark.memory=true`)
Expand All @@ -58,7 +64,6 @@ Everything else is either optional or inferred from the model's name or path.
- [x] BitsAndBytes quantization scheme (`backend.quantization_scheme=bnb`, `backend.quantization_config.load_in_4bit`, etc)
- [x] GPTQ quantization scheme (`backend.quantization_scheme=gptq`, `backend.quantization_config.bits=4`, etc)
- [x] PEFT training (`backend.peft_strategy=lora`, `backend.peft_config.task_type=CAUSAL_LM`, etc)
- [x] Distributed inference/training (`launcher=torchrun`, `launcher.n_proc_per_node=2`, etc)
- [x] Transformers' Flash Attention V2 (`backend.use_flash_attention_v2=true`)
- [x] Optimum's BetterTransformer (`backend.to_bettertransformer=true`)
- [x] DeepSpeed-Inference support (`backend.deepspeed_inference=true`)
Expand Down
3 changes: 3 additions & 0 deletions examples/openvino_diffusion.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ experiment_name: openvino_diffusion
model: stabilityai/stable-diffusion-2-1
device: cpu

launcher:
device_isolation: true

backend:
export: true
reshape: true
Expand Down
5 changes: 4 additions & 1 deletion examples/pytorch_bert.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
defaults:
- backend: pytorch # default backend
- launcher: inline # default launcher
- launcher: torchrun # default launcher
- benchmark: inference # default benchmark
- experiment # inheriting experiment schema
- _self_ # for hydra 1.1 compatibility
Expand All @@ -11,6 +11,9 @@ experiment_name: pytorch_bert
model: bert-base-uncased
device: cuda

launcher:
device_isolation: true

hydra:
run:
dir: runs/${experiment_name}
Expand Down
4 changes: 2 additions & 2 deletions examples/pytorch_llama.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ experiment_name: pytorch_llama
model: TheBloke/Llama-2-70B-AWQ
device: cuda

backend:
continuous_isolation: false
launcher:
device_isolation: true

benchmark:
input_shapes:
Expand Down
1 change: 0 additions & 1 deletion examples/running-llamas/configs/_base_.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ device: cuda
backend:
no_weights: true
torch_dtype: float16
continuous_isolation: true

benchmark:
memory: true
Expand Down
1 change: 0 additions & 1 deletion examples/tgi_llama.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ device: cuda
backend:
sharded: false
quantization_scheme: awq
continuous_isolation: false
# no_weights: true # wok in progress

benchmark:
Expand Down
1 change: 0 additions & 1 deletion examples/training-llamas/configs/_base_.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ device: cuda
backend:
no_weights: true
torch_dtype: float16
continuous_isolation: true

benchmark:
warmup_steps: 40
Expand Down
3 changes: 0 additions & 3 deletions examples/trt_llama.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@ experiment_name: trt_llama
model: NousResearch/Llama-2-7b-hf
device: cuda

backend:
continuous_isolation: false

benchmark:
input_shapes:
batch_size: 1
Expand Down
43 changes: 5 additions & 38 deletions optimum_benchmark/backends/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import shutil
from abc import ABC
from logging import getLogger
from multiprocessing import Process
from typing import Any, Callable, ClassVar, Dict, Generic, Optional, Union

import numpy as np
Expand All @@ -25,7 +24,6 @@
get_model_class_for_task,
)
from .config import BackendConfigT
from .isolation_utils import check_cuda_continuous_isolation
from .utils import (
PreTrainedProcessor,
extract_shapes_from_diffusion_pipeline,
Expand All @@ -41,7 +39,6 @@ class Backend(Generic[BackendConfigT], ABC):
library: str
model_type: str
config: BackendConfigT
isolation_thread: Optional[Process]
pretrained_model: Union[PreTrainedModel, Pipeline]
pretrained_config: Optional[PretrainedConfig]
pretrained_processor: Optional[PreTrainedProcessor]
Expand Down Expand Up @@ -89,7 +86,8 @@ def __init__(self, model: str, task: str, device: str, hub_kwargs: Dict[str, Any
self.pretrained_generation_config = None

self.automodel_class = get_model_class_for_task(
framework="pt", # TODO: make this configurable to add support for other frameworks
# TODO: make this configurable to add support for other frameworks
framework="pt",
task=self.task,
library=self.library,
model_type=self.model_type,
Expand All @@ -105,34 +103,17 @@ def configure(self, config: BackendConfigT) -> None:
LOGGER.info(f"Configuring {self.NAME} backend")
self.config = config

# isolation options
if self.config.continuous_isolation:
LOGGER.info("\t+ Running continuous isolation check")
self.check_continuous_isolation()

# clean up options
if self.config.delete_cache:
LOGGER.info("\t+ Model cache will be deleted after benchmark")

def check_continuous_isolation(self) -> None:
if self.device == "cuda":
self.isolation_process = Process(
target=check_cuda_continuous_isolation,
kwargs={
"isolated_pid": os.getpid(),
"isolation_check_interval": self.config.isolation_check_interval,
},
daemon=True,
)
self.isolation_process.start()
LOGGER.info(f"\t+ Started isolation process with PID {self.isolation_process.pid}")
else:
raise ValueError("Continuous isolation is only supported for CUDA devices")

def seed(self) -> None:
random.seed(self.config.seed)
np.random.seed(self.config.seed)

def prepare_for_inference(self, **kwargs) -> None:
pass

def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
# TODO: move this to only backends that need it (non cpu backends)
if self.is_diffusion_pipeline():
Expand All @@ -144,9 +125,6 @@ def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:

return inputs

def prepare_for_inference(self, **kwargs) -> None:
pass

def forward(self, input: Dict[str, Any], kwargs: Dict[str, Any]) -> ModelOutput:
return self.pretrained_model(**input, **kwargs)

Expand Down Expand Up @@ -181,22 +159,11 @@ def delete_hf_model_cache(self) -> None:
model_cache_path = os.path.join(os.path.expanduser("~/.cache/huggingface/hub"), model_cache_folder)
shutil.rmtree(model_cache_path, ignore_errors=True)

def terminate_isolation_process(self) -> None:
LOGGER.info("\t+ Terminating isolation process")
self.isolation_process.kill()
self.isolation_process.join()
self.isolation_process.close()

def clean(self) -> None:
LOGGER.info(f"Cleaning {self.NAME} backend")

if self.config.continuous_isolation:
self.terminate_isolation_process()

if hasattr(self, "pretrained_model"):
self.delete_pretrained_model()

if self.config.delete_cache:
self.delete_hf_model_cache()

gc.collect()
7 changes: 0 additions & 7 deletions optimum_benchmark/backends/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,6 @@ class BackendConfig(ABC):
inter_op_num_threads: Optional[int] = None
intra_op_num_threads: Optional[int] = None

# device isolation options
continuous_isolation: bool = True
isolation_check_interval: Optional[float] = None

# clean up options
delete_cache: bool = False

Expand All @@ -35,8 +31,5 @@ def __post_init__(self):
if self.intra_op_num_threads == -1:
self.intra_op_num_threads = cpu_count()

if self.continuous_isolation and self.isolation_check_interval is None:
self.isolation_check_interval = 1


BackendConfigT = TypeVar("BackendConfigT", bound=BackendConfig)
176 changes: 0 additions & 176 deletions optimum_benchmark/backends/isolation_utils.py

This file was deleted.

Loading

0 comments on commit 3c75320

Please sign in to comment.