Automatic generation of calibration datasets (#111)

huggingface · Jan 15, 2024 · ef70214 · ef70214
1 parent e372f64
commit ef70214
Show file tree

Hide file tree

Showing 14 changed files with 386 additions and 213 deletions.
diff --git a/examples/neural_compressor_ptq_bert.yaml b/examples/neural_compressor_ptq_bert.yaml
@@ -0,0 +1,31 @@
+defaults:
+  - launcher: process
+  - benchmark: inference
+  - backend: neural-compressor
+  - experiment # inheriting experiment schema
+  - _self_ # for hydra 1.1 compatibility
+  - override hydra/job_logging: colorlog # colorful logging
+  - override hydra/hydra_logging: colorlog # colorful logging
+
+experiment_name: openvino_static_quant_bert
+model: bert-base-uncased
+device: cpu
+
+backend:
+  no_weights: true
+  ptq_quantization: true
+  calibration: true
+
+benchmark:
+  input_shapes:
+    batch_size: 1
+
+hydra:
+  run:
+    dir: runs/${experiment_name}
+  sweep:
+    dir: sweeps/${experiment_name}
+  job:
+    chdir: true
+    env_set:
+      OVERRIDE_BENCHMARKS: 1
diff --git a/examples/onnxruntime_static_quant_vit.yaml b/examples/onnxruntime_static_quant_vit.yaml
@@ -0,0 +1,32 @@
+defaults:
+  - launcher: process
+  - benchmark: inference
+  - backend: onnxruntime
+  - experiment # inheriting experiment schema
+  - _self_ # for hydra 1.1 compatibility
+  - override hydra/job_logging: colorlog # colorful logging
+  - override hydra/hydra_logging: colorlog # colorful logging
+
+experiment_name: onnxruntime_static_quant_vit
+model: google/vit-base-patch16-224
+device: cpu
+
+backend:
+  quantization: true
+  quantization_config:
+    is_static: true
+    per_channel: false
+
+  calibration: true
+
+hydra:
+  run:
+    dir: runs/${experiment_name}
+  sweep:
+    dir: sweeps/${experiment_name}
+  job:
+    chdir: true
+    env_set:
+      OVERRIDE_BENCHMARKS: 1
+      CUDA_VISIBLE_DEVICES: 0
+      CUDA_DEVICE_ORDER: PCI_BUS_ID
diff --git a/examples/openvino_diffusion.yaml b/examples/openvino_diffusion.yaml
@@ -1,22 +1,19 @@
 defaults:
-  - backend: openvino # default backend
-  - launcher: inline # default launcher
-  - benchmark: inference # default benchmark
+  - backend: openvino
+  - launcher: process
+  - benchmark: inference
   - experiment # inheriting experiment schema
   - _self_ # for hydra 1.1 compatibility
   - override hydra/job_logging: colorlog # colorful logging
   - override hydra/hydra_logging: colorlog # colorful logging
 
-experiment_name: openvino_diffusion
 model: stabilityai/stable-diffusion-2-1
+experiment_name: openvino_diffusion
 device: cpu
 
-launcher:
-  device_isolation: true
-
 backend:
-  export: true
   reshape: true
+  export: true
   half: true
 
 benchmark:

diff --git a/examples/openvino_static_quant_bert.yaml b/examples/openvino_static_quant_bert.yaml
@@ -0,0 +1,33 @@
+defaults:
+  - backend: openvino
+  - launcher: process
+  - benchmark: inference
+  - experiment # inheriting experiment schema
+  - _self_ # for hydra 1.1 compatibility
+  - override hydra/job_logging: colorlog # colorful logging
+  - override hydra/hydra_logging: colorlog # colorful logging
+
+experiment_name: openvino_static_quant_bert
+model: bert-base-uncased
+device: cpu
+
+backend:
+  export: true
+  no_weights: true
+  quantization: true
+  calibration: true
+  reshape: true
+
+benchmark:
+  input_shapes:
+    batch_size: 1
+
+hydra:
+  run:
+    dir: runs/${experiment_name}
+  sweep:
+    dir: sweeps/${experiment_name}
+  job:
+    chdir: true
+    env_set:
+      OVERRIDE_BENCHMARKS: 1
diff --git a/examples/pytorch_bert.yaml b/examples/pytorch_bert.yaml
@@ -1,7 +1,7 @@
 defaults:
-  - backend: pytorch # default backend
-  - launcher: torchrun # default launcher
-  - benchmark: inference # default benchmark
+  - backend: pytorch
+  - launcher: process
+  - benchmark: inference
   - experiment # inheriting experiment schema
   - _self_ # for hydra 1.1 compatibility
   - override hydra/job_logging: colorlog # colorful logging

diff --git a/optimum_benchmark/backends/neural_compressor/backend.py b/optimum_benchmark/backends/neural_compressor/backend.py
@@ -1,22 +1,30 @@
 import gc
+import os
 from logging import getLogger
 from tempfile import TemporaryDirectory
 from typing import Any, Dict
 
+import torch
 from hydra.utils import get_class
 from neural_compressor.config import (
     AccuracyCriterion,
     PostTrainingQuantConfig,
     TuningCriterion,
 )
 from optimum.intel.neural_compressor.quantization import INCQuantizer
+from transformers.modeling_utils import no_init_weights
+from transformers.utils.logging import set_verbosity_error
 
+from ...generators.dataset_generator import DatasetGenerator
 from ..base import Backend
 from .config import INCConfig
 from .utils import TASKS_TO_INCMODELS
 
 LOGGER = getLogger("neural-compressor")
 
+# disable transformers logging
+set_verbosity_error()
+
 
 class INCBackend(Backend[INCConfig]):
     NAME: str = "neural-compressor"
@@ -45,20 +53,65 @@ def configure(self, config: INCConfig) -> None:
         self.tmpdir = TemporaryDirectory()
 
         if self.config.ptq_quantization:
-            self.load_automodel_from_pretrained()
+            if self.config.no_weights:
+                self.load_automodel_with_no_weights()
+            else:
+                self.load_automodel_from_pretrained()
             self.quantize_automodel()
             self.delete_pretrained_model()
+            self.load_incmodel_from_pretrained()
+        elif self.config.no_weights:
+            self.load_incmodel_with_no_weights()
+        else:
+            self.load_incmodel_from_pretrained()
 
-        self.load_incmodel_from_pretrained()
+        self.tmpdir.cleanup()
 
     def load_automodel_from_pretrained(self) -> None:
-        LOGGER.info("\t+ Loading AutoModel")
+        LOGGER.info("\t+ Loading AutoModel from pretrained")
         self.pretrained_model = self.automodel_class.from_pretrained(self.model, **self.hub_kwargs)
 
+    def load_automodel_with_no_weights(self) -> None:
+        no_weights_model = os.path.join(self.tmpdir.name, "no_weights")
+
+        if not os.path.exists(no_weights_model):
+            LOGGER.info("\t+ Creating no weights model directory")
+            os.makedirs(no_weights_model)
+
+        LOGGER.info("\t+ Saving pretrained config")
+        self.pretrained_config.save_pretrained(save_directory=no_weights_model)
+
+        LOGGER.info("\t+ Creating no weights model")
+        state_dict = torch.nn.Linear(1, 1).state_dict()
+
+        LOGGER.info("\t+ Saving no weights model")
+        torch.save(state_dict, os.path.join(no_weights_model, "pytorch_model.bin"))
+
+        LOGGER.info("\t+ Loading no weights model")
+        with no_init_weights():
+            original_model = self.model
+            self.model = no_weights_model
+            self.load_automodel_from_pretrained()
+            self.model = original_model
+
     def load_incmodel_from_pretrained(self) -> None:
-        LOGGER.info("\t+ Loading INCModel")
+        LOGGER.info("\t+ Loading INCModel from pretrained")
         self.pretrained_model = self.incmodel_class.from_pretrained(self.model, **self.hub_kwargs)
 
+    def load_incmodel_with_no_weights(self) -> None:
+        no_weights_model = os.path.join(self.tmpdir.name, "no_weights")
+
+        LOGGER.info("\t+ Loading AutoModel with no weights")
+        self.load_automodel_with_no_weights()
+        self.delete_pretrained_model()
+
+        LOGGER.info("\t+ Loading INCModel with no weights")
+        with no_init_weights():
+            original_model = self.model
+            self.model = no_weights_model
+            self.load_incmodel_from_pretrained()
+            self.model = original_model
+
     def quantize_automodel(self) -> None:
         LOGGER.info("\t+ Attempting to quantize model")
         quantized_model_path = f"{self.tmpdir.name}/quantized"
@@ -71,34 +124,33 @@ def quantize_automodel(self) -> None:
         ptq_quantization_config = PostTrainingQuantConfig(**ptq_quantization_config)
         LOGGER.info("\t+ Creating quantizer")
         quantizer = INCQuantizer.from_pretrained(
-            self.pretrained_model,
             task=self.task,
             seed=self.config.seed,
+            model=self.pretrained_model,
             # TODO: add support for these
-            eval_fn=None,
             calibration_fn=None,
+            eval_fn=None,
         )
 
         if self.config.calibration:
-            LOGGER.info("\t+ Processing calibration config")
-            calibration_config = self.config.calibration_config.copy()
-            preprocess_class = get_class(calibration_config.pop("preprocess_class"))
-            calibration_config["preprocess_function"] = preprocess_class(model_name_or_path=self.model)
-            LOGGER.info("\t+ Loading calibration dataset")
-            calibration_dataset = quantizer.get_calibration_dataset(**calibration_config)
+            LOGGER.info("\t+ Generating calibration dataset")
+            dataset_shapes = {"dataset_size": 1, "sequence_length": 1, **self.model_shapes}
+            calibration_dataset = DatasetGenerator(task=self.task, dataset_shapes=dataset_shapes).generate()
+            columns_to_be_removed = list(set(calibration_dataset.column_names) - set(quantizer._signature_columns))
+            calibration_dataset = calibration_dataset.remove_columns(columns_to_be_removed)
         else:
             calibration_dataset = None
 
         LOGGER.info("\t+ Quantizing model")
         quantizer.quantize(
-            quantization_config=ptq_quantization_config,
             save_directory=quantized_model_path,
             calibration_dataset=calibration_dataset,
+            quantization_config=ptq_quantization_config,
             # TODO: add support for these
             remove_unused_columns=True,
             data_collator=None,
             file_name=None,
-            batch_size=8,
+            batch_size=1,
         )
         self.model = quantized_model_path
 

diff --git a/optimum_benchmark/backends/neural_compressor/config.py b/optimum_benchmark/backends/neural_compressor/config.py
@@ -47,22 +47,15 @@
 }
 
 
-CALIBRATION_CONFIG = {
-    "dataset_name": "glue",
-    "num_samples": 300,
-    "dataset_config_name": "sst2",
-    "dataset_split": "train",
-    "preprocess_batch": True,
-    "preprocess_class": "optimum_benchmark.preprocessors.glue.GluePreprocessor",
-}
-
-
 @dataclass
 class INCConfig(BackendConfig):
     name: str = "neural_compressor"
     version: str = "${neural_compressor_version:}"
     _target_: str = "optimum_benchmark.backends.neural_compressor.backend.INCBackend"
 
+    # load options
+    no_weights: bool = False
+
     # post-training quantization options
     ptq_quantization: bool = False
     ptq_quantization_config: Dict[str, Any] = field(default_factory=dict)
@@ -80,6 +73,3 @@ def __post_init__(self):
             )
             if self.ptq_quantization_config["approach"] == "static" and not self.calibration:
                 raise ValueError("Calibration must be enabled when using static quantization.")
-
-        if self.calibration:
-            self.calibration_config = OmegaConf.to_object(OmegaConf.merge(CALIBRATION_CONFIG, self.calibration_config))