forked from pytorch/executorch
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Qualcomm AI Engine Direct - LLAMA2 Infrastructure (pytorch#2020)
Summary: 1. OPs - Add pow_tensor_scalar op - Add rsqrt op - Add sigmoid op - Refine axis handling of cat op - Refine parameters related functions 2. Passes - Add AnnotateDecomposed for unbind and stak op - Add DecomposeSilu for quantizer - Add ReplaceInfBuffer for quantizer - Change pass name ConvertAddmmmmWithLinear to ConvertToLinear - Change pass name ConvertScaledDotProductAttention to DecomposeScaledDotProductAttention - Support more args for sdpa op in DecomposeScaledDotProductAttention - Support mm case for ConvertToLinear - Move q_ops and dq_ops to pass/utils.py 3. Tests - Add dummy llama2 test script - Add single op test cases 4. Others - Fix error of popping missing buffer - Reorder the order of test models - Reorder the order of op in qnn_constant Pull Request resolved: pytorch#2020 Reviewed By: kirklandsign Differential Revision: D54010593 Pulled By: cccclai fbshipit-source-id: 657994dc223cb9bd88a263bfc2479295384fcb4d
- Loading branch information
1 parent
8fed60b
commit f707590
Showing
28 changed files
with
1,163 additions
and
479 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
# Copyright (c) Qualcomm Innovation Center, Inc. | ||
# All rights reserved | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
from typing import Dict | ||
|
||
import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper | ||
|
||
import torch | ||
from executorch.exir.dialects._ops import ops as exir_ops | ||
|
||
from .node_visitor import NodeVisitor, register_node_visitor | ||
from .qnn_constants import OpElementWisePower, QNN_OP_PACKAGE_NAME_QTI_AISW | ||
|
||
|
||
# TODO Add more class Like PowTensorTensor if needed | ||
@register_node_visitor | ||
class PowTensorScalar(NodeVisitor): | ||
target = "aten.pow.Tensor_Scalar" | ||
|
||
def __init__(self, *args) -> None: | ||
super().__init__(*args) | ||
|
||
def define_node( | ||
self, | ||
node: torch.fx.Node, | ||
nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper], | ||
) -> PyQnnWrapper.PyQnnOpWrapper: | ||
out_tensor = self.get_tensor(node, node) | ||
output_tensor_wrapper = self.define_tensor( | ||
node, | ||
out_tensor, | ||
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, | ||
nodes_to_wrappers, | ||
) | ||
pow_output_tensors = [output_tensor_wrapper] | ||
|
||
# tensor input | ||
input_node = node.args[0] | ||
input_tensor = self.get_tensor(input_node, node) | ||
|
||
tensor_type = PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE | ||
|
||
input_tensor_wrapper = self.define_tensor( | ||
input_node, | ||
input_tensor, | ||
tensor_type, | ||
nodes_to_wrappers, | ||
) | ||
|
||
# scalar input | ||
scalar = node.args[1] | ||
scalar_tensor = torch.full(input_tensor.size(), scalar).to(torch.float32) | ||
|
||
# 'graph', 'name', 'op', 'target', 'args', and 'kwargs' | ||
scalar_node = torch.fx.Node( | ||
node.graph, | ||
node.name + "_runtime_scalar", | ||
"call_function", | ||
exir_ops.edge.aten.full.default, | ||
(), # args | ||
{}, # kwargs | ||
) | ||
|
||
if pow_quant_attrs := node.meta.get("quant_attrs"): | ||
quant_attrs = pow_quant_attrs.copy() | ||
quant_range = quant_attrs["quant_max"] - quant_attrs["quant_min"] | ||
quant_attrs["zero_point"] = 0 if scalar >= 0 else quant_attrs["quant_max"] | ||
quant_attrs["scale"] = ( | ||
scalar / quant_range if scalar >= 0 else -scalar / quant_range | ||
) | ||
scalar_node.meta["quant_attrs"] = quant_attrs | ||
|
||
scalar_tensor_wrapper = self.define_tensor( | ||
scalar_node, | ||
scalar_tensor, | ||
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC, | ||
nodes_to_wrappers, | ||
) | ||
|
||
pow_input_tensors = [input_tensor_wrapper, scalar_tensor_wrapper] | ||
|
||
pow_op = PyQnnWrapper.PyQnnOpWrapper( | ||
node.name, | ||
QNN_OP_PACKAGE_NAME_QTI_AISW, | ||
OpElementWisePower.op_name, | ||
) | ||
pow_op.AddInputTensors(pow_input_tensors) | ||
pow_op.AddOutputTensors(pow_output_tensors) | ||
|
||
return pow_op |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
# Copyright (c) Qualcomm Innovation Center, Inc. | ||
# All rights reserved | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
from typing import Dict | ||
|
||
import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper | ||
|
||
import torch | ||
|
||
from .node_visitor import NodeVisitor, register_node_visitor | ||
from .qnn_constants import OpElementWiseRsqrt, QNN_OP_PACKAGE_NAME_QTI_AISW | ||
|
||
|
||
@register_node_visitor | ||
class Rsqrt(NodeVisitor): | ||
target = "aten.rsqrt.default" | ||
|
||
def __init__(self, *args) -> None: | ||
super().__init__(*args) | ||
|
||
def define_node( | ||
self, | ||
node: torch.fx.Node, | ||
nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper], | ||
) -> PyQnnWrapper.PyQnnOpWrapper: | ||
input_node = node.args[0] | ||
input_tensor = self.get_tensor(input_node, node) | ||
rsqrt_inp_tensor_wrapper = self.define_tensor( | ||
input_node, | ||
input_tensor, | ||
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, | ||
nodes_to_wrappers, | ||
) | ||
rsqrt_input_tensors = [rsqrt_inp_tensor_wrapper] | ||
|
||
output_tensor = self.get_tensor(node, node) | ||
output_tensor_wrapper = self.define_tensor( | ||
node, | ||
output_tensor, | ||
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, | ||
nodes_to_wrappers, | ||
) | ||
rsqrt_output_tensors = [output_tensor_wrapper] | ||
|
||
rsqrt_op = PyQnnWrapper.PyQnnOpWrapper( | ||
node.name, | ||
QNN_OP_PACKAGE_NAME_QTI_AISW, | ||
OpElementWiseRsqrt.op_name, | ||
) | ||
rsqrt_op.AddInputTensors(rsqrt_input_tensors) | ||
rsqrt_op.AddOutputTensors(rsqrt_output_tensors) | ||
|
||
return rsqrt_op |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
# Copyright (c) Qualcomm Innovation Center, Inc. | ||
# All rights reserved | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
from typing import Dict | ||
|
||
import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper | ||
|
||
import torch | ||
|
||
from .node_visitor import NodeVisitor, register_node_visitor | ||
from .qnn_constants import OpSigmoid, QNN_OP_PACKAGE_NAME_QTI_AISW | ||
|
||
|
||
@register_node_visitor | ||
class Sigmoid(NodeVisitor): | ||
target = "aten.sigmoid.default" | ||
|
||
def __init__(self, *args) -> None: | ||
super().__init__(*args) | ||
|
||
def define_node( | ||
self, | ||
node: torch.fx.Node, | ||
nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper], | ||
) -> PyQnnWrapper.PyQnnOpWrapper: | ||
input_node = node.args[0] | ||
input_tensor = self.get_tensor(input_node, node) | ||
sigmoid_inp_tensor_wrapper = self.define_tensor( | ||
input_node, | ||
input_tensor, | ||
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, | ||
nodes_to_wrappers, | ||
) | ||
sigmoid_input_tensors = [sigmoid_inp_tensor_wrapper] | ||
|
||
output_tensor = self.get_tensor(node, node) | ||
output_tensor_wrapper = self.define_tensor( | ||
node, | ||
output_tensor, | ||
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, | ||
nodes_to_wrappers, | ||
) | ||
sigmoid_output_tensors = [output_tensor_wrapper] | ||
|
||
sigmoid_op = PyQnnWrapper.PyQnnOpWrapper( | ||
node.name, | ||
QNN_OP_PACKAGE_NAME_QTI_AISW, | ||
OpSigmoid.op_name, | ||
) | ||
sigmoid_op.AddInputTensors(sigmoid_input_tensors) | ||
sigmoid_op.AddOutputTensors(sigmoid_output_tensors) | ||
|
||
return sigmoid_op |
Oops, something went wrong.