Skip to content

Commit

Permalink
Qualcomm AI Engine Direct - LLAMA2 Infrastructure (pytorch#2020)
Browse files Browse the repository at this point in the history
Summary:
1. OPs
- Add pow_tensor_scalar op
- Add rsqrt op
- Add sigmoid op
- Refine axis handling of cat op
- Refine parameters related functions
2. Passes
- Add AnnotateDecomposed for unbind and stak op
- Add DecomposeSilu for quantizer
- Add ReplaceInfBuffer for quantizer
- Change pass name ConvertAddmmmmWithLinear to ConvertToLinear
- Change pass name ConvertScaledDotProductAttention to DecomposeScaledDotProductAttention
- Support more args for sdpa op in DecomposeScaledDotProductAttention
- Support mm case for ConvertToLinear
- Move q_ops and dq_ops to pass/utils.py
3. Tests
- Add dummy llama2 test script
- Add single op test cases
4. Others
- Fix error of popping missing buffer
- Reorder the order of test models
- Reorder the order of op in qnn_constant

Pull Request resolved: pytorch#2020

Reviewed By: kirklandsign

Differential Revision: D54010593

Pulled By: cccclai

fbshipit-source-id: 657994dc223cb9bd88a263bfc2479295384fcb4d
  • Loading branch information
Joey Tsai authored and facebook-github-bot committed Feb 22, 2024
1 parent 8fed60b commit f707590
Show file tree
Hide file tree
Showing 28 changed files with 1,163 additions and 479 deletions.
6 changes: 6 additions & 0 deletions backends/qualcomm/builders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,13 @@
op_mean_dim,
op_mul,
op_pad,
op_pow,
op_quantize,
op_relu,
op_reshape,
op_rsqrt,
op_select_copy,
op_sigmoid,
op_skip_ops,
op_slice_copy,
op_softmax,
Expand Down Expand Up @@ -70,10 +73,13 @@
op_mean_dim,
op_mul,
op_pad,
op_pow,
op_quantize,
op_relu,
op_reshape,
op_rsqrt,
op_select_copy,
op_sigmoid,
op_skip_ops,
op_slice_copy,
op_softmax,
Expand Down
9 changes: 8 additions & 1 deletion backends/qualcomm/builders/op_cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,14 @@ def define_node(
nodes_to_wrappers,
)

axis = cast(int, node.args[1])
# node args[1] might not exist
axis = 0
if len(node.args) == 2:
axis = cast(int, node.args[1])

if axis < 0:
axis += node.meta["val"].dim()

if "axis_order" in node.meta:
axis = node.meta["axis_order"].index(axis)

Expand Down
92 changes: 92 additions & 0 deletions backends/qualcomm/builders/op_pow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Copyright (c) Qualcomm Innovation Center, Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
from typing import Dict

import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper

import torch
from executorch.exir.dialects._ops import ops as exir_ops

from .node_visitor import NodeVisitor, register_node_visitor
from .qnn_constants import OpElementWisePower, QNN_OP_PACKAGE_NAME_QTI_AISW


# TODO Add more class Like PowTensorTensor if needed
@register_node_visitor
class PowTensorScalar(NodeVisitor):
target = "aten.pow.Tensor_Scalar"

def __init__(self, *args) -> None:
super().__init__(*args)

def define_node(
self,
node: torch.fx.Node,
nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper],
) -> PyQnnWrapper.PyQnnOpWrapper:
out_tensor = self.get_tensor(node, node)
output_tensor_wrapper = self.define_tensor(
node,
out_tensor,
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
nodes_to_wrappers,
)
pow_output_tensors = [output_tensor_wrapper]

# tensor input
input_node = node.args[0]
input_tensor = self.get_tensor(input_node, node)

tensor_type = PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE

input_tensor_wrapper = self.define_tensor(
input_node,
input_tensor,
tensor_type,
nodes_to_wrappers,
)

# scalar input
scalar = node.args[1]
scalar_tensor = torch.full(input_tensor.size(), scalar).to(torch.float32)

# 'graph', 'name', 'op', 'target', 'args', and 'kwargs'
scalar_node = torch.fx.Node(
node.graph,
node.name + "_runtime_scalar",
"call_function",
exir_ops.edge.aten.full.default,
(), # args
{}, # kwargs
)

if pow_quant_attrs := node.meta.get("quant_attrs"):
quant_attrs = pow_quant_attrs.copy()
quant_range = quant_attrs["quant_max"] - quant_attrs["quant_min"]
quant_attrs["zero_point"] = 0 if scalar >= 0 else quant_attrs["quant_max"]
quant_attrs["scale"] = (
scalar / quant_range if scalar >= 0 else -scalar / quant_range
)
scalar_node.meta["quant_attrs"] = quant_attrs

scalar_tensor_wrapper = self.define_tensor(
scalar_node,
scalar_tensor,
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC,
nodes_to_wrappers,
)

pow_input_tensors = [input_tensor_wrapper, scalar_tensor_wrapper]

pow_op = PyQnnWrapper.PyQnnOpWrapper(
node.name,
QNN_OP_PACKAGE_NAME_QTI_AISW,
OpElementWisePower.op_name,
)
pow_op.AddInputTensors(pow_input_tensors)
pow_op.AddOutputTensors(pow_output_tensors)

return pow_op
55 changes: 55 additions & 0 deletions backends/qualcomm/builders/op_rsqrt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Copyright (c) Qualcomm Innovation Center, Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
from typing import Dict

import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper

import torch

from .node_visitor import NodeVisitor, register_node_visitor
from .qnn_constants import OpElementWiseRsqrt, QNN_OP_PACKAGE_NAME_QTI_AISW


@register_node_visitor
class Rsqrt(NodeVisitor):
target = "aten.rsqrt.default"

def __init__(self, *args) -> None:
super().__init__(*args)

def define_node(
self,
node: torch.fx.Node,
nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper],
) -> PyQnnWrapper.PyQnnOpWrapper:
input_node = node.args[0]
input_tensor = self.get_tensor(input_node, node)
rsqrt_inp_tensor_wrapper = self.define_tensor(
input_node,
input_tensor,
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
nodes_to_wrappers,
)
rsqrt_input_tensors = [rsqrt_inp_tensor_wrapper]

output_tensor = self.get_tensor(node, node)
output_tensor_wrapper = self.define_tensor(
node,
output_tensor,
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
nodes_to_wrappers,
)
rsqrt_output_tensors = [output_tensor_wrapper]

rsqrt_op = PyQnnWrapper.PyQnnOpWrapper(
node.name,
QNN_OP_PACKAGE_NAME_QTI_AISW,
OpElementWiseRsqrt.op_name,
)
rsqrt_op.AddInputTensors(rsqrt_input_tensors)
rsqrt_op.AddOutputTensors(rsqrt_output_tensors)

return rsqrt_op
55 changes: 55 additions & 0 deletions backends/qualcomm/builders/op_sigmoid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Copyright (c) Qualcomm Innovation Center, Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
from typing import Dict

import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper

import torch

from .node_visitor import NodeVisitor, register_node_visitor
from .qnn_constants import OpSigmoid, QNN_OP_PACKAGE_NAME_QTI_AISW


@register_node_visitor
class Sigmoid(NodeVisitor):
target = "aten.sigmoid.default"

def __init__(self, *args) -> None:
super().__init__(*args)

def define_node(
self,
node: torch.fx.Node,
nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper],
) -> PyQnnWrapper.PyQnnOpWrapper:
input_node = node.args[0]
input_tensor = self.get_tensor(input_node, node)
sigmoid_inp_tensor_wrapper = self.define_tensor(
input_node,
input_tensor,
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
nodes_to_wrappers,
)
sigmoid_input_tensors = [sigmoid_inp_tensor_wrapper]

output_tensor = self.get_tensor(node, node)
output_tensor_wrapper = self.define_tensor(
node,
output_tensor,
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
nodes_to_wrappers,
)
sigmoid_output_tensors = [output_tensor_wrapper]

sigmoid_op = PyQnnWrapper.PyQnnOpWrapper(
node.name,
QNN_OP_PACKAGE_NAME_QTI_AISW,
OpSigmoid.op_name,
)
sigmoid_op.AddInputTensors(sigmoid_input_tensors)
sigmoid_op.AddOutputTensors(sigmoid_output_tensors)

return sigmoid_op
Loading

0 comments on commit f707590

Please sign in to comment.