From fd584606b6d2308373b2160ad118a5b9d87f33c4 Mon Sep 17 00:00:00 2001
From: Huy Do <huydo@fb.com>
Date: Wed, 7 Feb 2024 12:48:02 -0800
Subject: [PATCH] Revert D53247301: Set `fold_quantize` to True in
 `convert_pt2e`

Differential Revision:
D53247301

Original commit changeset: 5b2dbbc76487

Original Phabricator Diff: D53247301

fbshipit-source-id: 6270c48ff97c5a44bd47e6c5c40df82d1bec4644
---
 backends/arm/test/test_tosa.py              | 2 +-
 backends/example/test_example_delegate.py   | 4 ++--
 backends/qualcomm/tests/utils.py            | 2 +-
 backends/xnnpack/test/test_xnnpack_utils.py | 2 +-
 backends/xnnpack/test/tester/tester.py      | 2 +-
 examples/models/llama2/quantize.py          | 1 -
 examples/qualcomm/scripts/export_example.py | 2 +-
 examples/qualcomm/scripts/utils.py          | 2 +-
 examples/xnnpack/quantization/example.py    | 2 +-
 examples/xnnpack/quantization/utils.py      | 2 +-
 examples/xtensa/aot/export_example.py       | 2 +-
 exir/tests/test_quantization.py             | 2 +-
 12 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/backends/arm/test/test_tosa.py b/backends/arm/test/test_tosa.py
index 56d58a021b..6d0e14bc23 100644
--- a/backends/arm/test/test_tosa.py
+++ b/backends/arm/test/test_tosa.py
@@ -94,7 +94,7 @@ def prepare_model_and_ref(test_model, profile=TosaProfile.MI):
         # Quantize
         prepared_model = prepare_pt2e(captured_model_graph_module, quantizer)
         prepared_model(*model.inputs[profile])
-        model = convert_pt2e(prepared_model)
+        model = convert_pt2e(prepared_model, fold_quantize=True)
 
     model_outputs = model.forward(*model_inputs)
     return model, model_inputs, model_outputs
diff --git a/backends/example/test_example_delegate.py b/backends/example/test_example_delegate.py
index 27354e02ad..fd79962832 100644
--- a/backends/example/test_example_delegate.py
+++ b/backends/example/test_example_delegate.py
@@ -56,7 +56,7 @@ def get_example_inputs():
         m = prepare_pt2e(m, quantizer)
         # calibration
         m(*example_inputs)
-        m = convert_pt2e(m)
+        m = convert_pt2e(m, fold_quantize=True)
 
         quantized_gm = m
         exported_program = exir.capture(
@@ -88,7 +88,7 @@ def test_delegate_mobilenet_v2(self):
         m = prepare_pt2e(m, quantizer)
         # calibration
         m(*example_inputs)
-        m = convert_pt2e(m)
+        m = convert_pt2e(m, fold_quantize=True)
 
         quantized_gm = m
         exported_program = exir.capture(
diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py
index b55608bc9e..eadbba33af 100644
--- a/backends/qualcomm/tests/utils.py
+++ b/backends/qualcomm/tests/utils.py
@@ -172,7 +172,7 @@ def get_qdq_module(
 
         prepared = prepare_pt2e(m, quantizer)
         prepared(*inputs)
-        quantized_module = convert_pt2e(prepared)
+        quantized_module = convert_pt2e(prepared, fold_quantize=True)
         nodes = {node.target for node in quantized_module.graph.nodes}
         q_and_dq = {
             torch.ops.quantized_decomposed.quantize_per_tensor.default,
diff --git a/backends/xnnpack/test/test_xnnpack_utils.py b/backends/xnnpack/test/test_xnnpack_utils.py
index c6b1513d31..d28a5c4e29 100644
--- a/backends/xnnpack/test/test_xnnpack_utils.py
+++ b/backends/xnnpack/test/test_xnnpack_utils.py
@@ -324,7 +324,7 @@ def quantize_and_test_model_with_quantizer(
         quantization_config = get_symmetric_quantization_config()
         quantizer.set_global(quantization_config)
         prepared = prepare_pt2e(m, quantizer)
-        converted = convert_pt2e(prepared)
+        converted = convert_pt2e(prepared, fold_quantize=True)
 
         captured_program = exir.capture(
             converted,
diff --git a/backends/xnnpack/test/tester/tester.py b/backends/xnnpack/test/tester/tester.py
index b2701a5f44..3635555b08 100644
--- a/backends/xnnpack/test/tester/tester.py
+++ b/backends/xnnpack/test/tester/tester.py
@@ -154,7 +154,7 @@ def run(
             # Calibrate prepared model to provide data to quantization observers.
             prepared(*inputs)
 
-        converted = convert_pt2e(prepared)
+        converted = convert_pt2e(prepared, fold_quantize=True)
         self.converted_graph = converted
 
     @property
diff --git a/examples/models/llama2/quantize.py b/examples/models/llama2/quantize.py
index cd5d4648ea..4c1257e5b1 100644
--- a/examples/models/llama2/quantize.py
+++ b/examples/models/llama2/quantize.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-
 from typing import Dict, Tuple
 
 import torch
diff --git a/examples/qualcomm/scripts/export_example.py b/examples/qualcomm/scripts/export_example.py
index 9e9ade0502..c76f97c02b 100644
--- a/examples/qualcomm/scripts/export_example.py
+++ b/examples/qualcomm/scripts/export_example.py
@@ -51,7 +51,7 @@
     # Calibration
     m(*example_inputs)
     # Get the quantized model
-    m = convert_pt2e(m)
+    m = convert_pt2e(m, fold_quantize=True)
 
     # Capture program for edge IR
     edge_program = capture_program(m, example_inputs)
diff --git a/examples/qualcomm/scripts/utils.py b/examples/qualcomm/scripts/utils.py
index cc1089881d..a7a6f1adb9 100755
--- a/examples/qualcomm/scripts/utils.py
+++ b/examples/qualcomm/scripts/utils.py
@@ -159,7 +159,7 @@ def build_executorch_binary(
         # calibration
         for data in dataset:
             annotated_model(*data)
-        quantized_model = convert_pt2e(annotated_model)
+        quantized_model = convert_pt2e(annotated_model, fold_quantize=True)
 
         edge_prog = capture_program(quantized_model, inputs)
     else:
diff --git a/examples/xnnpack/quantization/example.py b/examples/xnnpack/quantization/example.py
index 9a7d22408a..fa61e3e616 100644
--- a/examples/xnnpack/quantization/example.py
+++ b/examples/xnnpack/quantization/example.py
@@ -66,7 +66,7 @@ def verify_xnnpack_quantizer_matching_fx_quant_model(model_name, model, example_
     # calibration
     after_prepare_result = m(*example_inputs)
     logging.info(f"prepare_pt2e: {m}")
-    m = convert_pt2e(m)
+    m = convert_pt2e(m, fold_quantize=True)
     after_quant_result = m(*example_inputs)
 
     # 2. the previous fx graph mode quantization reference flow
diff --git a/examples/xnnpack/quantization/utils.py b/examples/xnnpack/quantization/utils.py
index 6f8aa3913f..b274f040d7 100644
--- a/examples/xnnpack/quantization/utils.py
+++ b/examples/xnnpack/quantization/utils.py
@@ -23,7 +23,7 @@ def quantize(model, example_inputs):
     m = prepare_pt2e(model, quantizer)
     # calibration
     m(*example_inputs)
-    m = convert_pt2e(m)
+    m = convert_pt2e(m, fold_quantize=True)
     logging.info(f"Quantized model: {m}")
     # make sure we can export to flat buffer
     return m
diff --git a/examples/xtensa/aot/export_example.py b/examples/xtensa/aot/export_example.py
index a9e2fb9c64..aee219ee4a 100644
--- a/examples/xtensa/aot/export_example.py
+++ b/examples/xtensa/aot/export_example.py
@@ -60,7 +60,7 @@ def forward(self, x: torch.Tensor):
     prepared_model(*example_inputs)
 
     # Convert
-    converted_model = convert_pt2e(prepared_model)
+    converted_model = convert_pt2e(prepared_model, fold_quantize=True)
 
     # pyre-fixme[16]: Pyre doesn't get that XtensaQuantizer has a patterns attribute
     patterns = [q.pattern for q in quantizer.quantizers]
diff --git a/exir/tests/test_quantization.py b/exir/tests/test_quantization.py
index ec61b3dffa..5fb3cb676d 100644
--- a/exir/tests/test_quantization.py
+++ b/exir/tests/test_quantization.py
@@ -63,7 +63,7 @@ def test_resnet(self) -> None:
                 id(m.activation_post_process_3), id(m.activation_post_process_2)
             )
             after_prepare_result = m(*example_inputs)[0]
-            m = convert_pt2e(m)
+            m = convert_pt2e(m, fold_quantize=True)
 
             # TODO: conv, conv_relu, linear delegation
             # quantized ops to implement: add_relu