Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
[ghstack-poisoned]
  • Loading branch information
vkuzo committed Feb 14, 2025
2 parents 3fd4cfc + 2f0d4e3 commit e397c47
Showing 1 changed file with 2 additions and 3 deletions.
5 changes: 2 additions & 3 deletions torchao/_models/llama/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,10 +420,9 @@ def ffn_or_attn_only(mod, fqn):
else:
quantize_(model, int8_dynamic_activation_int8_weight())
if "int4wo" in quantization:
use_hqq = False
if "hqq" in quantization:
use_hqq = True
else:
use_hqq = False
group_size = int(quantization.split("-")[1])
assert (
group_size
Expand All @@ -434,7 +433,7 @@ def ffn_or_attn_only(mod, fqn):
256,
]
), f"int4wo group_size needs to be one of [32,64,128,256] but got {group_size}"
quantize_(model, int4_weight_only(group_size=group_size))
quantize_(model, int4_weight_only(group_size=group_size, use_hqq=use_hqq))
elif "int8adq-int4w-symm" in quantization:
from torchao.dtypes import CutlassInt4PackedLayout

Expand Down

0 comments on commit e397c47

Please sign in to comment.