From fd584606b6d2308373b2160ad118a5b9d87f33c4 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Wed, 7 Feb 2024 12:48:02 -0800 Subject: [PATCH] Revert D53247301: Set `fold_quantize` to True in `convert_pt2e` Differential Revision: D53247301 Original commit changeset: 5b2dbbc76487 Original Phabricator Diff: D53247301 fbshipit-source-id: 6270c48ff97c5a44bd47e6c5c40df82d1bec4644 --- backends/arm/test/test_tosa.py | 2 +- backends/example/test_example_delegate.py | 4 ++-- backends/qualcomm/tests/utils.py | 2 +- backends/xnnpack/test/test_xnnpack_utils.py | 2 +- backends/xnnpack/test/tester/tester.py | 2 +- examples/models/llama2/quantize.py | 1 - examples/qualcomm/scripts/export_example.py | 2 +- examples/qualcomm/scripts/utils.py | 2 +- examples/xnnpack/quantization/example.py | 2 +- examples/xnnpack/quantization/utils.py | 2 +- examples/xtensa/aot/export_example.py | 2 +- exir/tests/test_quantization.py | 2 +- 12 files changed, 12 insertions(+), 13 deletions(-) diff --git a/backends/arm/test/test_tosa.py b/backends/arm/test/test_tosa.py index 56d58a021b..6d0e14bc23 100644 --- a/backends/arm/test/test_tosa.py +++ b/backends/arm/test/test_tosa.py @@ -94,7 +94,7 @@ def prepare_model_and_ref(test_model, profile=TosaProfile.MI): # Quantize prepared_model = prepare_pt2e(captured_model_graph_module, quantizer) prepared_model(*model.inputs[profile]) - model = convert_pt2e(prepared_model) + model = convert_pt2e(prepared_model, fold_quantize=True) model_outputs = model.forward(*model_inputs) return model, model_inputs, model_outputs diff --git a/backends/example/test_example_delegate.py b/backends/example/test_example_delegate.py index 27354e02ad..fd79962832 100644 --- a/backends/example/test_example_delegate.py +++ b/backends/example/test_example_delegate.py @@ -56,7 +56,7 @@ def get_example_inputs(): m = prepare_pt2e(m, quantizer) # calibration m(*example_inputs) - m = convert_pt2e(m) + m = convert_pt2e(m, fold_quantize=True) quantized_gm = m exported_program = exir.capture( @@ -88,7 +88,7 @@ def test_delegate_mobilenet_v2(self): m = prepare_pt2e(m, quantizer) # calibration m(*example_inputs) - m = convert_pt2e(m) + m = convert_pt2e(m, fold_quantize=True) quantized_gm = m exported_program = exir.capture( diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py index b55608bc9e..eadbba33af 100644 --- a/backends/qualcomm/tests/utils.py +++ b/backends/qualcomm/tests/utils.py @@ -172,7 +172,7 @@ def get_qdq_module( prepared = prepare_pt2e(m, quantizer) prepared(*inputs) - quantized_module = convert_pt2e(prepared) + quantized_module = convert_pt2e(prepared, fold_quantize=True) nodes = {node.target for node in quantized_module.graph.nodes} q_and_dq = { torch.ops.quantized_decomposed.quantize_per_tensor.default, diff --git a/backends/xnnpack/test/test_xnnpack_utils.py b/backends/xnnpack/test/test_xnnpack_utils.py index c6b1513d31..d28a5c4e29 100644 --- a/backends/xnnpack/test/test_xnnpack_utils.py +++ b/backends/xnnpack/test/test_xnnpack_utils.py @@ -324,7 +324,7 @@ def quantize_and_test_model_with_quantizer( quantization_config = get_symmetric_quantization_config() quantizer.set_global(quantization_config) prepared = prepare_pt2e(m, quantizer) - converted = convert_pt2e(prepared) + converted = convert_pt2e(prepared, fold_quantize=True) captured_program = exir.capture( converted, diff --git a/backends/xnnpack/test/tester/tester.py b/backends/xnnpack/test/tester/tester.py index b2701a5f44..3635555b08 100644 --- a/backends/xnnpack/test/tester/tester.py +++ b/backends/xnnpack/test/tester/tester.py @@ -154,7 +154,7 @@ def run( # Calibrate prepared model to provide data to quantization observers. prepared(*inputs) - converted = convert_pt2e(prepared) + converted = convert_pt2e(prepared, fold_quantize=True) self.converted_graph = converted @property diff --git a/examples/models/llama2/quantize.py b/examples/models/llama2/quantize.py index cd5d4648ea..4c1257e5b1 100644 --- a/examples/models/llama2/quantize.py +++ b/examples/models/llama2/quantize.py @@ -4,7 +4,6 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. - from typing import Dict, Tuple import torch diff --git a/examples/qualcomm/scripts/export_example.py b/examples/qualcomm/scripts/export_example.py index 9e9ade0502..c76f97c02b 100644 --- a/examples/qualcomm/scripts/export_example.py +++ b/examples/qualcomm/scripts/export_example.py @@ -51,7 +51,7 @@ # Calibration m(*example_inputs) # Get the quantized model - m = convert_pt2e(m) + m = convert_pt2e(m, fold_quantize=True) # Capture program for edge IR edge_program = capture_program(m, example_inputs) diff --git a/examples/qualcomm/scripts/utils.py b/examples/qualcomm/scripts/utils.py index cc1089881d..a7a6f1adb9 100755 --- a/examples/qualcomm/scripts/utils.py +++ b/examples/qualcomm/scripts/utils.py @@ -159,7 +159,7 @@ def build_executorch_binary( # calibration for data in dataset: annotated_model(*data) - quantized_model = convert_pt2e(annotated_model) + quantized_model = convert_pt2e(annotated_model, fold_quantize=True) edge_prog = capture_program(quantized_model, inputs) else: diff --git a/examples/xnnpack/quantization/example.py b/examples/xnnpack/quantization/example.py index 9a7d22408a..fa61e3e616 100644 --- a/examples/xnnpack/quantization/example.py +++ b/examples/xnnpack/quantization/example.py @@ -66,7 +66,7 @@ def verify_xnnpack_quantizer_matching_fx_quant_model(model_name, model, example_ # calibration after_prepare_result = m(*example_inputs) logging.info(f"prepare_pt2e: {m}") - m = convert_pt2e(m) + m = convert_pt2e(m, fold_quantize=True) after_quant_result = m(*example_inputs) # 2. the previous fx graph mode quantization reference flow diff --git a/examples/xnnpack/quantization/utils.py b/examples/xnnpack/quantization/utils.py index 6f8aa3913f..b274f040d7 100644 --- a/examples/xnnpack/quantization/utils.py +++ b/examples/xnnpack/quantization/utils.py @@ -23,7 +23,7 @@ def quantize(model, example_inputs): m = prepare_pt2e(model, quantizer) # calibration m(*example_inputs) - m = convert_pt2e(m) + m = convert_pt2e(m, fold_quantize=True) logging.info(f"Quantized model: {m}") # make sure we can export to flat buffer return m diff --git a/examples/xtensa/aot/export_example.py b/examples/xtensa/aot/export_example.py index a9e2fb9c64..aee219ee4a 100644 --- a/examples/xtensa/aot/export_example.py +++ b/examples/xtensa/aot/export_example.py @@ -60,7 +60,7 @@ def forward(self, x: torch.Tensor): prepared_model(*example_inputs) # Convert - converted_model = convert_pt2e(prepared_model) + converted_model = convert_pt2e(prepared_model, fold_quantize=True) # pyre-fixme[16]: Pyre doesn't get that XtensaQuantizer has a patterns attribute patterns = [q.pattern for q in quantizer.quantizers] diff --git a/exir/tests/test_quantization.py b/exir/tests/test_quantization.py index ec61b3dffa..5fb3cb676d 100644 --- a/exir/tests/test_quantization.py +++ b/exir/tests/test_quantization.py @@ -63,7 +63,7 @@ def test_resnet(self) -> None: id(m.activation_post_process_3), id(m.activation_post_process_2) ) after_prepare_result = m(*example_inputs)[0] - m = convert_pt2e(m) + m = convert_pt2e(m, fold_quantize=True) # TODO: conv, conv_relu, linear delegation # quantized ops to implement: add_relu