Skip to content

Commit

Permalink
fix hpu attn coding issue
Browse files Browse the repository at this point in the history
  • Loading branch information
libinta committed Feb 5, 2025
1 parent b97f7c6 commit 593ded0
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions vllm/attention/backends/hpu_attn.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,24 +228,26 @@ def forward(
self.head_size)

if attn_metadata is None or attn_metadata.block_list is None:
attn_bias = attn_metadata.attn_bias
if not self.prefill_use_fusedsdpa:
# TODO: move this outside of model
assert attn_metadata.attn_bias is not None, \
'attn_bias must be set before calling model.forward'
attn_bias = attn_metadata.attn_bias
if self.alibi_slopes is not None:
position_bias = _make_alibi_bias(
self.alibi_slopes, self.num_kv_heads,
attn_bias.dtype, attn_bias.shape[-1])
attn_bias = attn_bias.tile(
(1, self.num_kv_heads, 1, 1))
attn_bias.add_(position_bias)
else:
attn_bias = attn_metadata.attn_bias

out = ops.prompt_attention(
query.view(query_shape),
key.view(kv_shape),
value.view(kv_shape),
attn_bias=attn_metadata.attn_bias,
attn_bias=attn_bias,
p=0.0,
scale=self.scale,
matmul_qk_op=self.matmul_qk,
Expand Down

0 comments on commit 593ded0

Please sign in to comment.