Skip to content

Commit

Permalink
seperate quantize and export_to_edge in builder (pytorch#3613)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: pytorch#3613

Currently export_to_edge includes both applying quantizer and run to_edge, separate them so I can call quantize only in the eval_llama.py

Reviewed By: Jack-Khuu, larryliu0820

Differential Revision: D57367832

fbshipit-source-id: 04d225df5403657cd86726af8cffb77c7c41147e
  • Loading branch information
cccclai authored and facebook-github-bot committed May 16, 2024
1 parent 36f83eb commit 0364d45
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 13 deletions.
45 changes: 35 additions & 10 deletions examples/models/llama2/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ def __init__(
verbose: bool = False,
):
self.model = model
# graph module returned from capture_pre_autograd_graph
self.pre_autograd_graph_module: Optional[torch.fx.GraphModule] = None
self.modelname = modelname
self.weight_type = weight_type
self.dtype = dtype
Expand Down Expand Up @@ -251,25 +253,27 @@ def _get_metadata(self):
self.metadata = metadata
return self.metadata

def export_to_edge(
def pt2e_quantize(
self, quantizers: Optional[List[Quantizer]]
) -> "LlamaEdgeManager":
"""
Export the model to Edge dialect and retrieve a EdgeManager.
Quantize the model via pt2e flow and retrieve LlamaEdgeManager including the quantized model.
Args:
quantizers (Optional[List[Quantizer]]): A list of quantizers.
"""
assert (
self.edge_manager is None
), "export_to_edge is already called, please call pt2e_quantize before export_to_edge"
logging.info(f"Using pt2e {quantizers} to quantizing the model...")
dynamic_shape = self._get_dynamic_shape()
edge_config = self._get_edge_config()
metadata = self._get_metadata()

# 1. torch.nn.attention.sdpa_kernel([SDPBackend.MATH]) is for bypassing the dynamo error when tracing
# 2. torch.no_grad() is for getting rid of the dropout (not sure why training ops will show up)
with torch.nn.attention.sdpa_kernel([SDPBackend.MATH]), torch.no_grad():
m = capture_pre_autograd_graph(
self.model, self.example_inputs, dynamic_shapes=dynamic_shape
)
if quantizers:
if quantizers:
with torch.nn.attention.sdpa_kernel([SDPBackend.MATH]), torch.no_grad():
m = capture_pre_autograd_graph(
self.model, self.example_inputs, dynamic_shapes=dynamic_shape
)
if self.verbose:
logging.info(f"Applied quantizers: {quantizers}")
composed_quantizer = ComposableQuantizer(quantizers)
Expand All @@ -278,8 +282,29 @@ def export_to_edge(
m(*self.example_inputs)
m = convert_pt2e(m)
DuplicateDynamicQuantChainPass()(m)
self.pre_autograd_graph_module = m
return self
else:
logging.info("No quantizer provided, passing...")
return self

def export_to_edge(self) -> "LlamaEdgeManager":
"""
Export the model to Edge dialect and retrieve a LlamaEdgeManager.
"""
dynamic_shape = self._get_dynamic_shape()
edge_config = self._get_edge_config()
metadata = self._get_metadata()

# 1. torch.nn.attention.sdpa_kernel([SDPBackend.MATH]) is for bypassing the dynamo error when tracing
# 2. torch.no_grad() is for getting rid of the dropout (not sure why training ops will show up)
with torch.nn.attention.sdpa_kernel([SDPBackend.MATH]), torch.no_grad():
if self.pre_autograd_graph_module is None:
self.pre_autograd_graph_module = capture_pre_autograd_graph(
self.model, self.example_inputs, dynamic_shapes=dynamic_shape
)
self.edge_manager = export_to_edge(
m,
self.pre_autograd_graph_module,
self.example_inputs,
dynamic_shapes=dynamic_shape,
edge_constant_methods=metadata,
Expand Down
8 changes: 5 additions & 3 deletions examples/models/llama2/export_llama_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,9 +378,11 @@ def _export_llama(modelname, args) -> str: # noqa: C901
qnn_quantizer, quant_dtype = get_qnn_quantizer(args)
quantizers.append(qnn_quantizer)

builder_exported_to_edge = _prepare_for_llama_export(
modelname, args
).export_to_edge(quantizers)
builder_exported_to_edge = (
_prepare_for_llama_export(modelname, args)
.pt2e_quantize(quantizers)
.export_to_edge()
)

modelname = builder_exported_to_edge.modelname

Expand Down

0 comments on commit 0364d45

Please sign in to comment.