-
Notifications
You must be signed in to change notification settings - Fork 444
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Move XPU/Opt related PRs from releases2.0 to develop (#3295)
* Enable training on XPU devices in OTX2.0 (#3094) * add raising an error when metric is None * added accelerators * fix packages * fix assigning model * debug on MAX * change precision * update MixedPrecisionXPUPlugin * debug * added monkey patching * minor * minor * added patch for mmengine * fix OD and IS * benchmark debug * change device * quick fix for instance seg * fix pre-commit * fix pre-commit * clean the code * added additional flag for mmcv * added unit tests * fixed unit test * fix linter * added unit tests and replied comments * fix pre-commit * minor fix * added documentation * fix unit test * add workaround for semantic segmentation * remove RoiAlignTest due to unstability * minor * remove strategy back * try to patch SingleDeviceStrategy * added auto xpu configuration * patch strategy * small fix * reply to comments * move patching xpu packages to accelerator * fix test_xpu test * remove do-not-install-mmcv * fix pre-commit * remove torch.xpu.optimize for segmentation --------- Co-authored-by: Emily <[email protected]> * Add exporter/demo unit tests (#3218) * added unit tests. Need to clean up * move tests * fix pre-commit * return demo back * minor * delete unnecessery comments * fix unit test * fix pre-commit * fix pre-commit 2 * fix test_postprocess_openvino_model * fix unit tests * test_precommit * Fix a bug that engine.test doesn't work with XPU (#3293) * fix bug * align with pre-commit --------- Co-authored-by: Emily <[email protected]> * fix merge conflicts for pre-commit * fix precommit 2 * fix unit test * fix pre-commit * fix export tests * fix pre-commit * fix tox * fix pre-commit --------- Co-authored-by: Emily <[email protected]> Co-authored-by: Eunwoo Shin <[email protected]>
- Loading branch information
1 parent
dd4ae39
commit 255be7a
Showing
31 changed files
with
1,355 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
"""Lightning accelerator for XPU device.""" | ||
|
||
from .xpu import XPUAccelerator | ||
|
||
__all__ = ["XPUAccelerator"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
"""Lightning accelerator for XPU device.""" | ||
# Copyright (C) 2023 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
from __future__ import annotations | ||
|
||
from typing import Any, Union | ||
|
||
import numpy as np | ||
import torch | ||
from lightning.pytorch.accelerators import AcceleratorRegistry | ||
from lightning.pytorch.accelerators.accelerator import Accelerator | ||
from mmcv.ops.nms import NMSop | ||
from mmcv.ops.roi_align import RoIAlign | ||
from mmengine.structures import instance_data | ||
|
||
from otx.algo.detection.utils import monkey_patched_nms, monkey_patched_roi_align | ||
from otx.utils.utils import is_xpu_available | ||
|
||
|
||
class XPUAccelerator(Accelerator): | ||
"""Support for a XPU, optimized for large-scale machine learning.""" | ||
|
||
accelerator_name = "xpu" | ||
|
||
def setup_device(self, device: torch.device) -> None: | ||
"""Sets up the specified device.""" | ||
if device.type != "xpu": | ||
msg = f"Device should be xpu, got {device} instead" | ||
raise RuntimeError(msg) | ||
|
||
torch.xpu.set_device(device) | ||
self.patch_packages_xpu() | ||
|
||
@staticmethod | ||
def parse_devices(devices: str | list | torch.device) -> list: | ||
"""Parses devices for multi-GPU training.""" | ||
if isinstance(devices, list): | ||
return devices | ||
return [devices] | ||
|
||
@staticmethod | ||
def get_parallel_devices(devices: list) -> list[torch.device]: | ||
"""Generates a list of parrallel devices.""" | ||
return [torch.device("xpu", idx) for idx in devices] | ||
|
||
@staticmethod | ||
def auto_device_count() -> int: | ||
"""Returns number of XPU devices available.""" | ||
return torch.xpu.device_count() | ||
|
||
@staticmethod | ||
def is_available() -> bool: | ||
"""Checks if XPU available.""" | ||
return is_xpu_available() | ||
|
||
def get_device_stats(self, device: str | torch.device) -> dict[str, Any]: | ||
"""Returns XPU devices stats.""" | ||
return {} | ||
|
||
def teardown(self) -> None: | ||
"""Cleans-up XPU-related resources.""" | ||
self.revert_packages_xpu() | ||
|
||
def patch_packages_xpu(self) -> None: | ||
"""Patch packages when xpu is available.""" | ||
# patch instance_data from mmengie | ||
long_type_tensor = Union[torch.LongTensor, torch.xpu.LongTensor] | ||
bool_type_tensor = Union[torch.BoolTensor, torch.xpu.BoolTensor] | ||
instance_data.IndexType = Union[str, slice, int, list, long_type_tensor, bool_type_tensor, np.ndarray] | ||
|
||
# patch nms and roi_align | ||
self._nms_op_forward = NMSop.forward | ||
self._roi_align_forward = RoIAlign.forward | ||
NMSop.forward = monkey_patched_nms | ||
RoIAlign.forward = monkey_patched_roi_align | ||
|
||
def revert_packages_xpu(self) -> None: | ||
"""Revert packages when xpu is available.""" | ||
NMSop.forward = self._nms_op_forward | ||
RoIAlign.forward = self._roi_align_forward | ||
|
||
|
||
AcceleratorRegistry.register( | ||
XPUAccelerator.accelerator_name, | ||
XPUAccelerator, | ||
description="Accelerator supports XPU devices", | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,8 @@ | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
"""Data structures for detection task.""" | ||
# | ||
"""utils for detection task.""" | ||
|
||
from .mmcv_patched_ops import monkey_patched_nms, monkey_patched_roi_align | ||
|
||
__all__ = ["monkey_patched_nms", "monkey_patched_roi_align"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
"""utils for detection task.""" | ||
|
||
from __future__ import annotations | ||
|
||
from typing import TYPE_CHECKING | ||
|
||
import torch | ||
from mmcv.utils import ext_loader | ||
from torchvision.ops import nms as tv_nms | ||
from torchvision.ops import roi_align as tv_roi_align | ||
|
||
if TYPE_CHECKING: | ||
from mmcv.ops.nms import NMSop | ||
from mmcv.ops.roi_align import RoIAlign | ||
|
||
ext_module = ext_loader.load_ext("_ext", ["nms", "softnms", "nms_match", "nms_rotated", "nms_quadri"]) | ||
|
||
|
||
def monkey_patched_nms( | ||
ctx: NMSop, | ||
bboxes: torch.Tensor, | ||
scores: torch.Tensor, | ||
iou_threshold: float, | ||
offset: float, | ||
score_threshold: float, | ||
max_num: int, | ||
) -> torch.Tensor: | ||
"""Runs MMCVs NMS with torchvision.nms, or forces NMS from MMCV to run on CPU.""" | ||
_ = ctx | ||
is_filtering_by_score = score_threshold > 0 | ||
if is_filtering_by_score: | ||
valid_mask = scores > score_threshold | ||
bboxes, scores = bboxes[valid_mask], scores[valid_mask] | ||
valid_inds = torch.nonzero(valid_mask, as_tuple=False).squeeze(dim=1) | ||
|
||
if bboxes.dtype == torch.bfloat16: | ||
bboxes = bboxes.to(torch.float32) | ||
if scores.dtype == torch.bfloat16: | ||
scores = scores.to(torch.float32) | ||
|
||
if offset == 0: | ||
inds = tv_nms(bboxes, scores, float(iou_threshold)) | ||
else: | ||
device = bboxes.device | ||
bboxes = bboxes.to("cpu") | ||
scores = scores.to("cpu") | ||
inds = ext_module.nms(bboxes, scores, iou_threshold=float(iou_threshold), offset=offset) | ||
bboxes = bboxes.to(device) | ||
scores = scores.to(device) | ||
|
||
if max_num > 0: | ||
inds = inds[:max_num] | ||
if is_filtering_by_score: | ||
inds = valid_inds[inds] | ||
return inds | ||
|
||
|
||
def monkey_patched_roi_align(self: RoIAlign, _input: torch.Tensor, rois: torch.Tensor) -> torch.Tensor: | ||
"""Replaces MMCVs roi align with the one from torchvision. | ||
Args: | ||
self: patched instance | ||
_input: NCHW images | ||
rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy. | ||
""" | ||
if "aligned" in tv_roi_align.__code__.co_varnames: | ||
return tv_roi_align(_input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.aligned) | ||
if self.aligned: | ||
rois -= rois.new_tensor([0.0] + [0.5 / self.spatial_scale] * 4) | ||
return tv_roi_align(_input, rois, self.output_size, self.spatial_scale, self.sampling_ratio) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
"""Plugin for mixed-precision training on XPU.""" | ||
|
||
from .xpu_precision import MixedPrecisionXPUPlugin | ||
|
||
__all__ = ["MixedPrecisionXPUPlugin"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
"""Plugin for mixed-precision training on XPU.""" | ||
|
||
from __future__ import annotations | ||
|
||
from contextlib import contextmanager | ||
from typing import TYPE_CHECKING, Any, Callable, Generator | ||
|
||
import torch | ||
from lightning.pytorch.plugins.precision.precision import Precision | ||
from lightning.pytorch.utilities import GradClipAlgorithmType | ||
from lightning.pytorch.utilities.exceptions import MisconfigurationException | ||
from torch import Tensor | ||
from torch.optim import LBFGS, Optimizer | ||
|
||
if TYPE_CHECKING: | ||
import lightning.pytorch as pl | ||
from lightning_fabric.utilities.types import Optimizable | ||
|
||
|
||
class MixedPrecisionXPUPlugin(Precision): | ||
"""Plugin for Automatic Mixed Precision (AMP) training with ``torch.xpu.autocast``. | ||
Args: | ||
scaler: An optional :class:`torch.cuda.amp.GradScaler` to use. | ||
""" | ||
|
||
def __init__(self, scaler: torch.cuda.amp.GradScaler | None = None) -> None: | ||
self.scaler = scaler | ||
|
||
def pre_backward(self, tensor: Tensor, module: pl.LightningModule) -> Tensor: | ||
"""Apply grad scaler before backward.""" | ||
if self.scaler is not None: | ||
tensor = self.scaler.scale(tensor) | ||
return super().pre_backward(tensor, module) | ||
|
||
def optimizer_step( # type: ignore[override] | ||
self, | ||
optimizer: Optimizable, | ||
model: pl.LightningModule, | ||
closure: Callable, | ||
**kwargs: dict, | ||
) -> None | dict: | ||
"""Make an optimizer step using scaler if it was passed.""" | ||
if self.scaler is None: | ||
# skip scaler logic, as bfloat16 does not require scaler | ||
return super().optimizer_step( | ||
optimizer, | ||
model=model, | ||
closure=closure, | ||
**kwargs, | ||
) | ||
if isinstance(optimizer, LBFGS): | ||
msg = "Native AMP and the LBFGS optimizer are not compatible." | ||
raise MisconfigurationException( | ||
msg, | ||
) | ||
closure_result = closure() | ||
|
||
if not _optimizer_handles_unscaling(optimizer): | ||
# Unscaling needs to be performed here in case we are going to apply gradient clipping. | ||
# Optimizers that perform unscaling in their `.step()` method are not supported (e.g., fused Adam). | ||
# Note: `unscale` happens after the closure is executed, but before the `on_before_optimizer_step` hook. | ||
self.scaler.unscale_(optimizer) | ||
|
||
self._after_closure(model, optimizer) | ||
skipped_backward = closure_result is None | ||
# in manual optimization, the closure does not return a value | ||
if not model.automatic_optimization or not skipped_backward: | ||
# note: the scaler will skip the `optimizer.step` if nonfinite gradients are found | ||
step_output = self.scaler.step(optimizer, **kwargs) | ||
self.scaler.update() | ||
return step_output | ||
return closure_result | ||
|
||
def clip_gradients( | ||
self, | ||
optimizer: Optimizer, | ||
clip_val: int | float = 0.0, | ||
gradient_clip_algorithm: GradClipAlgorithmType = GradClipAlgorithmType.NORM, | ||
) -> None: | ||
"""Handle grad clipping with scaler.""" | ||
if clip_val > 0 and _optimizer_handles_unscaling(optimizer): | ||
msg = f"The current optimizer, {type(optimizer).__qualname__}, does not allow for gradient clipping" | ||
" because it performs unscaling of gradients internally. HINT: Are you using a 'fused' optimizer?" | ||
raise RuntimeError( | ||
msg, | ||
) | ||
super().clip_gradients(optimizer=optimizer, clip_val=clip_val, gradient_clip_algorithm=gradient_clip_algorithm) | ||
|
||
@contextmanager | ||
def forward_context(self) -> Generator[None, None, None]: | ||
"""Enable autocast context.""" | ||
with torch.xpu.autocast(True): | ||
yield | ||
|
||
def state_dict(self) -> dict[str, Any]: | ||
"""Returns state dict of the plugin.""" | ||
if self.scaler is not None: | ||
return self.scaler.state_dict() | ||
return {} | ||
|
||
def load_state_dict(self, state_dict: dict[str, torch.Tensor]) -> None: | ||
"""Loads state dict to the plugin.""" | ||
if self.scaler is not None: | ||
self.scaler.load_state_dict(state_dict) | ||
|
||
|
||
def _optimizer_handles_unscaling(optimizer: torch.optim.Optimizer) -> bool: | ||
"""Determines if a PyTorch optimizer handles unscaling gradients in the step method ratherthan through the scaler. | ||
Since, the current implementation of this function checks a PyTorch internal variable on the optimizer, the return | ||
value will only be reliable for built-in PyTorch optimizers. | ||
""" | ||
return getattr(optimizer, "_step_supports_amp_scaling", False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
"""Lightning strategy for single XPU device.""" | ||
|
||
from .xpu_single import SingleXPUStrategy | ||
|
||
__all__ = ["SingleXPUStrategy"] |
Oops, something went wrong.