From 030fc3f8190c7e577f90d3af73688e9e9e9af191 Mon Sep 17 00:00:00 2001 From: Digant Desai Date: Thu, 5 Sep 2024 23:12:35 -0500 Subject: [PATCH] [LLAVA] Enable 2nd XNNPACK Partition pass for the text model Differential Revision: D62279641 Pull Request resolved: https://github.com/pytorch/executorch/pull/4968 --- examples/models/llava/export_llava.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/examples/models/llava/export_llava.py b/examples/models/llava/export_llava.py index 4f8a403bb3..a41d8d3ba0 100644 --- a/examples/models/llava/export_llava.py +++ b/examples/models/llava/export_llava.py @@ -211,10 +211,15 @@ def export_all(llava_model: LlavaModel): partitioner={ "image_encoder": [XnnpackPartitioner()], "text_model": [ + # First partition the DQLinear nodes, then partition the rest of the nodes, + # to avoid multiple DQLinear nodes in the same partition, + # to avoid holding multiple unpacked and packed weight buffers in memory, + # to reduce peak memory footprint. XnnpackPartitioner( config_precisions=ConfigPrecisionType.DYNAMIC_QUANT, per_op_mode=True, - ) + ), + XnnpackPartitioner(), ], }, compile_config=EdgeCompileConfig(_check_ir_validity=False),