diff --git a/backends/cadence/aot/TARGETS b/backends/cadence/aot/TARGETS index ff4256d19f..8456c50f6c 100644 --- a/backends/cadence/aot/TARGETS +++ b/backends/cadence/aot/TARGETS @@ -43,6 +43,7 @@ python_library( "//executorch/backends/transforms:decompose_sdpa", "//executorch/backends/transforms:remove_clone_ops", "//executorch/exir:lib", + "//executorch/devtools:lib", ], ) diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py index 89467dbc23..62bae4b21d 100644 --- a/backends/cadence/aot/compiler.py +++ b/backends/cadence/aot/compiler.py @@ -7,6 +7,7 @@ # pyre-strict import logging +from pathlib import Path from typing import Optional import torch @@ -29,7 +30,13 @@ DecomposeScaledDotProductAttention, ) from executorch.backends.transforms.remove_clone_ops import RemoveCloneOpsTransform -from executorch.exir import EdgeCompileConfig, EdgeProgramManager, to_edge +from executorch.devtools import generate_etrecord +from executorch.exir import ( + EdgeCompileConfig, + EdgeProgramManager, + ExecutorchProgramManager, + to_edge, +) from torch.ao.quantization.pt2e.export_utils import model_is_exported from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e @@ -197,11 +204,12 @@ def export_to_edge( # Export the model and lower it to an EdgeProgramManager (in edge IR), and # apply passes specific to Cadence DSP execution. Return both to print the # differences. -def export_to_cadence( +def export_to_cadence_edge_executorch( model: torch.nn.Module, inputs: tuple[object, ...], dump_graphs: bool = False, -) -> EdgeProgramManager: + output_dir: Optional[str] = None, +) -> ExecutorchProgramManager: edge_prog_manager = export_to_edge(model, inputs) # Run a couple required passes for quant/dequant ops @@ -225,4 +233,29 @@ def export_to_cadence( cadence_prog_manager.exported_program().graph_module, ) - return cadence_prog_manager + # Get executorch program after Cadence specific passes + exec_prog: ExecutorchProgramManager = cadence_prog_manager.to_executorch() + if output_dir: + _gen_etrecord(edge_prog_manager, exec_prog, Path(output_dir)) + else: + logging.warning("No output directory provided, skipping ETRecord generation") + + return exec_prog + + +def _gen_etrecord( + edge_program: EdgeProgramManager, + et_program: ExecutorchProgramManager, + output_dir: Path, +) -> None: + etrec_path = output_dir / "etrecord.bin" + try: + generate_etrecord( + et_record=etrec_path, + edge_dialect_program=edge_program, + executorch_program=et_program, + ) + logging.info(f"Generated ETRecord at {etrec_path}") + except Exception: + # Any errors here shouldn't block the rest of the flow + logging.exception("Encountered exception while generating ETRecord") diff --git a/backends/cadence/aot/export_example.py b/backends/cadence/aot/export_example.py index 10433016e3..0204f717fb 100644 --- a/backends/cadence/aot/export_example.py +++ b/backends/cadence/aot/export_example.py @@ -14,7 +14,7 @@ from executorch.backends.cadence.aot.compiler import ( convert_pt2, - export_to_cadence, + export_to_cadence_edge_executorch, fuse_pt2, ) from executorch.backends.cadence.aot.quantizer.quantizer import CadenceQuantizer @@ -53,10 +53,9 @@ def export_model( quantized_model = fuse_pt2(converted_model, quantizer) # Get edge program after Cadence specific passes - cadence_prog_manager = export_to_cadence(quantized_model, example_inputs) - - # Get executorch program after Cadence specific passes - exec_prog: ExecutorchProgramManager = cadence_prog_manager.to_executorch() + exec_prog: ExecutorchProgramManager = export_to_cadence_edge_executorch( + quantized_model, example_inputs, working_dir + ) logging.info("Final exported graph:\n") exec_prog.exported_program().graph_module.graph.print_tabular() diff --git a/backends/cadence/build_cadence_runner.sh b/backends/cadence/build_cadence_runner.sh index 7a784a6bf6..88687c5f75 100755 --- a/backends/cadence/build_cadence_runner.sh +++ b/backends/cadence/build_cadence_runner.sh @@ -25,6 +25,7 @@ main() { -DCMAKE_BUILD_TYPE=Release \ -DEXECUTORCH_BUILD_DEVTOOLS=ON \ -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ + -DEXECUTORCH_ENABLE_LOGGING=ON \ -Bcmake-out . cmake --build cmake-out --target install --config Release -j16 @@ -35,6 +36,7 @@ main() { cmake -DCMAKE_PREFIX_PATH="${cmake_prefix_path}" \ -DCMAKE_BUILD_TYPE=Release \ -DEXECUTORCH_CADENCE_CPU_RUNNER=ON \ + -DEXECUTORCH_ENABLE_LOGGING=ON \ -B"${build_dir}" \ "${example_dir}" cmake --build "${build_dir}" --config Release -j16 diff --git a/backends/cadence/reference/operators/CMakeLists.txt b/backends/cadence/reference/operators/CMakeLists.txt index 5feafdf6e3..6cd4c870b6 100644 --- a/backends/cadence/reference/operators/CMakeLists.txt +++ b/backends/cadence/reference/operators/CMakeLists.txt @@ -27,6 +27,7 @@ set(_aten_ops__srcs "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/activation_ops_util.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/copy_ops_util.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/broadcast_util.cpp" + "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/dtype_util.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/index_util.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/kernel_ops_util.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/matmul_ops_util.cpp" diff --git a/backends/cadence/runtime/executor.py b/backends/cadence/runtime/executor.py index d07b1b6a52..7aea3fde0d 100644 --- a/backends/cadence/runtime/executor.py +++ b/backends/cadence/runtime/executor.py @@ -123,6 +123,7 @@ def __call__(self) -> None: ), "etdump_path": os.path.join(self.working_dir, "etdump.etdp"), "debug_output_path": os.path.join(self.working_dir, "debug_output.bin"), + "dump_outputs": "true", } args = self.get_bash_command(self.execute_runner, cmd_args) logging.info(f"\33[33m{' '.join(args)}\33[0m")