Skip to content

Commit

Permalink
Automatic generation of calibration datasets (#111)
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil authored Jan 15, 2024
1 parent e372f64 commit ef70214
Show file tree
Hide file tree
Showing 14 changed files with 386 additions and 213 deletions.
31 changes: 31 additions & 0 deletions examples/neural_compressor_ptq_bert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
defaults:
- launcher: process
- benchmark: inference
- backend: neural-compressor
- experiment # inheriting experiment schema
- _self_ # for hydra 1.1 compatibility
- override hydra/job_logging: colorlog # colorful logging
- override hydra/hydra_logging: colorlog # colorful logging

experiment_name: openvino_static_quant_bert
model: bert-base-uncased
device: cpu

backend:
no_weights: true
ptq_quantization: true
calibration: true

benchmark:
input_shapes:
batch_size: 1

hydra:
run:
dir: runs/${experiment_name}
sweep:
dir: sweeps/${experiment_name}
job:
chdir: true
env_set:
OVERRIDE_BENCHMARKS: 1
32 changes: 32 additions & 0 deletions examples/onnxruntime_static_quant_vit.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
defaults:
- launcher: process
- benchmark: inference
- backend: onnxruntime
- experiment # inheriting experiment schema
- _self_ # for hydra 1.1 compatibility
- override hydra/job_logging: colorlog # colorful logging
- override hydra/hydra_logging: colorlog # colorful logging

experiment_name: onnxruntime_static_quant_vit
model: google/vit-base-patch16-224
device: cpu

backend:
quantization: true
quantization_config:
is_static: true
per_channel: false

calibration: true

hydra:
run:
dir: runs/${experiment_name}
sweep:
dir: sweeps/${experiment_name}
job:
chdir: true
env_set:
OVERRIDE_BENCHMARKS: 1
CUDA_VISIBLE_DEVICES: 0
CUDA_DEVICE_ORDER: PCI_BUS_ID
13 changes: 5 additions & 8 deletions examples/openvino_diffusion.yaml
Original file line number Diff line number Diff line change
@@ -1,22 +1,19 @@
defaults:
- backend: openvino # default backend
- launcher: inline # default launcher
- benchmark: inference # default benchmark
- backend: openvino
- launcher: process
- benchmark: inference
- experiment # inheriting experiment schema
- _self_ # for hydra 1.1 compatibility
- override hydra/job_logging: colorlog # colorful logging
- override hydra/hydra_logging: colorlog # colorful logging

experiment_name: openvino_diffusion
model: stabilityai/stable-diffusion-2-1
experiment_name: openvino_diffusion
device: cpu

launcher:
device_isolation: true

backend:
export: true
reshape: true
export: true
half: true

benchmark:
Expand Down
33 changes: 33 additions & 0 deletions examples/openvino_static_quant_bert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
defaults:
- backend: openvino
- launcher: process
- benchmark: inference
- experiment # inheriting experiment schema
- _self_ # for hydra 1.1 compatibility
- override hydra/job_logging: colorlog # colorful logging
- override hydra/hydra_logging: colorlog # colorful logging

experiment_name: openvino_static_quant_bert
model: bert-base-uncased
device: cpu

backend:
export: true
no_weights: true
quantization: true
calibration: true
reshape: true

benchmark:
input_shapes:
batch_size: 1

hydra:
run:
dir: runs/${experiment_name}
sweep:
dir: sweeps/${experiment_name}
job:
chdir: true
env_set:
OVERRIDE_BENCHMARKS: 1
6 changes: 3 additions & 3 deletions examples/pytorch_bert.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
defaults:
- backend: pytorch # default backend
- launcher: torchrun # default launcher
- benchmark: inference # default benchmark
- backend: pytorch
- launcher: process
- benchmark: inference
- experiment # inheriting experiment schema
- _self_ # for hydra 1.1 compatibility
- override hydra/job_logging: colorlog # colorful logging
Expand Down
80 changes: 66 additions & 14 deletions optimum_benchmark/backends/neural_compressor/backend.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,30 @@
import gc
import os
from logging import getLogger
from tempfile import TemporaryDirectory
from typing import Any, Dict

import torch
from hydra.utils import get_class
from neural_compressor.config import (
AccuracyCriterion,
PostTrainingQuantConfig,
TuningCriterion,
)
from optimum.intel.neural_compressor.quantization import INCQuantizer
from transformers.modeling_utils import no_init_weights
from transformers.utils.logging import set_verbosity_error

from ...generators.dataset_generator import DatasetGenerator
from ..base import Backend
from .config import INCConfig
from .utils import TASKS_TO_INCMODELS

LOGGER = getLogger("neural-compressor")

# disable transformers logging
set_verbosity_error()


class INCBackend(Backend[INCConfig]):
NAME: str = "neural-compressor"
Expand Down Expand Up @@ -45,20 +53,65 @@ def configure(self, config: INCConfig) -> None:
self.tmpdir = TemporaryDirectory()

if self.config.ptq_quantization:
self.load_automodel_from_pretrained()
if self.config.no_weights:
self.load_automodel_with_no_weights()
else:
self.load_automodel_from_pretrained()
self.quantize_automodel()
self.delete_pretrained_model()
self.load_incmodel_from_pretrained()
elif self.config.no_weights:
self.load_incmodel_with_no_weights()
else:
self.load_incmodel_from_pretrained()

self.load_incmodel_from_pretrained()
self.tmpdir.cleanup()

def load_automodel_from_pretrained(self) -> None:
LOGGER.info("\t+ Loading AutoModel")
LOGGER.info("\t+ Loading AutoModel from pretrained")
self.pretrained_model = self.automodel_class.from_pretrained(self.model, **self.hub_kwargs)

def load_automodel_with_no_weights(self) -> None:
no_weights_model = os.path.join(self.tmpdir.name, "no_weights")

if not os.path.exists(no_weights_model):
LOGGER.info("\t+ Creating no weights model directory")
os.makedirs(no_weights_model)

LOGGER.info("\t+ Saving pretrained config")
self.pretrained_config.save_pretrained(save_directory=no_weights_model)

LOGGER.info("\t+ Creating no weights model")
state_dict = torch.nn.Linear(1, 1).state_dict()

LOGGER.info("\t+ Saving no weights model")
torch.save(state_dict, os.path.join(no_weights_model, "pytorch_model.bin"))

LOGGER.info("\t+ Loading no weights model")
with no_init_weights():
original_model = self.model
self.model = no_weights_model
self.load_automodel_from_pretrained()
self.model = original_model

def load_incmodel_from_pretrained(self) -> None:
LOGGER.info("\t+ Loading INCModel")
LOGGER.info("\t+ Loading INCModel from pretrained")
self.pretrained_model = self.incmodel_class.from_pretrained(self.model, **self.hub_kwargs)

def load_incmodel_with_no_weights(self) -> None:
no_weights_model = os.path.join(self.tmpdir.name, "no_weights")

LOGGER.info("\t+ Loading AutoModel with no weights")
self.load_automodel_with_no_weights()
self.delete_pretrained_model()

LOGGER.info("\t+ Loading INCModel with no weights")
with no_init_weights():
original_model = self.model
self.model = no_weights_model
self.load_incmodel_from_pretrained()
self.model = original_model

def quantize_automodel(self) -> None:
LOGGER.info("\t+ Attempting to quantize model")
quantized_model_path = f"{self.tmpdir.name}/quantized"
Expand All @@ -71,34 +124,33 @@ def quantize_automodel(self) -> None:
ptq_quantization_config = PostTrainingQuantConfig(**ptq_quantization_config)
LOGGER.info("\t+ Creating quantizer")
quantizer = INCQuantizer.from_pretrained(
self.pretrained_model,
task=self.task,
seed=self.config.seed,
model=self.pretrained_model,
# TODO: add support for these
eval_fn=None,
calibration_fn=None,
eval_fn=None,
)

if self.config.calibration:
LOGGER.info("\t+ Processing calibration config")
calibration_config = self.config.calibration_config.copy()
preprocess_class = get_class(calibration_config.pop("preprocess_class"))
calibration_config["preprocess_function"] = preprocess_class(model_name_or_path=self.model)
LOGGER.info("\t+ Loading calibration dataset")
calibration_dataset = quantizer.get_calibration_dataset(**calibration_config)
LOGGER.info("\t+ Generating calibration dataset")
dataset_shapes = {"dataset_size": 1, "sequence_length": 1, **self.model_shapes}
calibration_dataset = DatasetGenerator(task=self.task, dataset_shapes=dataset_shapes).generate()
columns_to_be_removed = list(set(calibration_dataset.column_names) - set(quantizer._signature_columns))
calibration_dataset = calibration_dataset.remove_columns(columns_to_be_removed)
else:
calibration_dataset = None

LOGGER.info("\t+ Quantizing model")
quantizer.quantize(
quantization_config=ptq_quantization_config,
save_directory=quantized_model_path,
calibration_dataset=calibration_dataset,
quantization_config=ptq_quantization_config,
# TODO: add support for these
remove_unused_columns=True,
data_collator=None,
file_name=None,
batch_size=8,
batch_size=1,
)
self.model = quantized_model_path

Expand Down
16 changes: 3 additions & 13 deletions optimum_benchmark/backends/neural_compressor/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,22 +47,15 @@
}


CALIBRATION_CONFIG = {
"dataset_name": "glue",
"num_samples": 300,
"dataset_config_name": "sst2",
"dataset_split": "train",
"preprocess_batch": True,
"preprocess_class": "optimum_benchmark.preprocessors.glue.GluePreprocessor",
}


@dataclass
class INCConfig(BackendConfig):
name: str = "neural_compressor"
version: str = "${neural_compressor_version:}"
_target_: str = "optimum_benchmark.backends.neural_compressor.backend.INCBackend"

# load options
no_weights: bool = False

# post-training quantization options
ptq_quantization: bool = False
ptq_quantization_config: Dict[str, Any] = field(default_factory=dict)
Expand All @@ -80,6 +73,3 @@ def __post_init__(self):
)
if self.ptq_quantization_config["approach"] == "static" and not self.calibration:
raise ValueError("Calibration must be enabled when using static quantization.")

if self.calibration:
self.calibration_config = OmegaConf.to_object(OmegaConf.merge(CALIBRATION_CONFIG, self.calibration_config))
Loading

0 comments on commit ef70214

Please sign in to comment.