You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Hi I am using Google colab and when i run this command "mistral-demo $M7B_DIR" I use T4 GPU i got this error any solution for that plz
Traceback (most recent call last):
File "/usr/local/bin/mistral-demo", line 8, in
sys.exit(mistral_demo())
File "/usr/local/lib/python3.10/dist-packages/mistral_inference/main.py", line 183, in mistral_demo
fire.Fire(demo)
File "/usr/local/lib/python3.10/dist-packages/fire/core.py", line 143, in Fire
component_trace = _Fire(component, args, parsed_flag_args, context, name)
File "/usr/local/lib/python3.10/dist-packages/fire/core.py", line 477, in _Fire
component, remaining_args = _CallAndUpdateTrace(
File "/usr/local/lib/python3.10/dist-packages/fire/core.py", line 693, in _CallAndUpdateTrace
component = fn(*varargs, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/mistral_inference/main.py", line 157, in demo
generated_tokens, _logprobs = generate(
File "/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/mistral_inference/generate.py", line 49, in generate
prelogits = model.forward(
File "/usr/local/lib/python3.10/dist-packages/mistral_inference/model.py", line 314, in forward
h = self.forward_partial(input_ids, seqlens, cache=cache)
File "/usr/local/lib/python3.10/dist-packages/mistral_inference/model.py", line 296, in forward_partial
h = layer(h, freqs_cis, cache_view)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/mistral_inference/model.py", line 188, in forward
r = self.attention.forward(self.attention_norm(x), freqs_cis, cache)
File "/usr/local/lib/python3.10/dist-packages/mistral_inference/model.py", line 128, in forward
output = memory_efficient_attention(
File "/usr/local/lib/python3.10/dist-packages/xformers/ops/fmha/init.py", line 268, in memory_efficient_attention
return _memory_efficient_attention(
File "/usr/local/lib/python3.10/dist-packages/xformers/ops/fmha/init.py", line 387, in _memory_efficient_attention
return _memory_efficient_attention_forward(
File "/usr/local/lib/python3.10/dist-packages/xformers/ops/fmha/init.py", line 403, in _memory_efficient_attention_forward
op = _dispatch_fw(inp, False)
File "/usr/local/lib/python3.10/dist-packages/xformers/ops/fmha/dispatch.py", line 125, in _dispatch_fw
return _run_priority_list(
File "/usr/local/lib/python3.10/dist-packages/xformers/ops/fmha/dispatch.py", line 65, in _run_priority_list
raise NotImplementedError(msg)
NotImplementedError: No operator found for memory_efficient_attention_forward with inputs:
query : shape=(1, 28, 32, 128) (torch.bfloat16)
key : shape=(1, 28, 32, 128) (torch.bfloat16)
value : shape=(1, 28, 32, 128) (torch.bfloat16)
attn_bias : <class 'xformers.ops.fmha.attn_bias.BlockDiagonalCausalLocalAttentionMask'>
p : 0.0 decoderF is not supported because:
attn_bias type is <class 'xformers.ops.fmha.attn_bias.BlockDiagonalCausalLocalAttentionMask'>
bf16 is only supported on A100+ GPUs [email protected] is not supported because:
requires device with capability > (8, 0) but your GPU has capability (7, 5) (too old)
bf16 is only supported on A100+ GPUs cutlassF is not supported because:
bf16 is only supported on A100+ GPUs smallkF is not supported because:
max(query.shape[-1] != value.shape[-1]) > 32
dtype=torch.bfloat16 (supported: {torch.float32})
attn_bias type is <class 'xformers.ops.fmha.attn_bias.BlockDiagonalCausalLocalAttentionMask'>
bf16 is only supported on A100+ GPUs
unsupported embed per head: 128
The text was updated successfully, but these errors were encountered:
Hi I am using Google colab and when i run this command "mistral-demo $M7B_DIR" I use T4 GPU i got this error any solution for that plz
Traceback (most recent call last):
File "/usr/local/bin/mistral-demo", line 8, in
sys.exit(mistral_demo())
File "/usr/local/lib/python3.10/dist-packages/mistral_inference/main.py", line 183, in mistral_demo
fire.Fire(demo)
File "/usr/local/lib/python3.10/dist-packages/fire/core.py", line 143, in Fire
component_trace = _Fire(component, args, parsed_flag_args, context, name)
File "/usr/local/lib/python3.10/dist-packages/fire/core.py", line 477, in _Fire
component, remaining_args = _CallAndUpdateTrace(
File "/usr/local/lib/python3.10/dist-packages/fire/core.py", line 693, in _CallAndUpdateTrace
component = fn(*varargs, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/mistral_inference/main.py", line 157, in demo
generated_tokens, _logprobs = generate(
File "/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/mistral_inference/generate.py", line 49, in generate
prelogits = model.forward(
File "/usr/local/lib/python3.10/dist-packages/mistral_inference/model.py", line 314, in forward
h = self.forward_partial(input_ids, seqlens, cache=cache)
File "/usr/local/lib/python3.10/dist-packages/mistral_inference/model.py", line 296, in forward_partial
h = layer(h, freqs_cis, cache_view)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/mistral_inference/model.py", line 188, in forward
r = self.attention.forward(self.attention_norm(x), freqs_cis, cache)
File "/usr/local/lib/python3.10/dist-packages/mistral_inference/model.py", line 128, in forward
output = memory_efficient_attention(
File "/usr/local/lib/python3.10/dist-packages/xformers/ops/fmha/init.py", line 268, in memory_efficient_attention
return _memory_efficient_attention(
File "/usr/local/lib/python3.10/dist-packages/xformers/ops/fmha/init.py", line 387, in _memory_efficient_attention
return _memory_efficient_attention_forward(
File "/usr/local/lib/python3.10/dist-packages/xformers/ops/fmha/init.py", line 403, in _memory_efficient_attention_forward
op = _dispatch_fw(inp, False)
File "/usr/local/lib/python3.10/dist-packages/xformers/ops/fmha/dispatch.py", line 125, in _dispatch_fw
return _run_priority_list(
File "/usr/local/lib/python3.10/dist-packages/xformers/ops/fmha/dispatch.py", line 65, in _run_priority_list
raise NotImplementedError(msg)
NotImplementedError: No operator found for
memory_efficient_attention_forward
with inputs:query : shape=(1, 28, 32, 128) (torch.bfloat16)
key : shape=(1, 28, 32, 128) (torch.bfloat16)
value : shape=(1, 28, 32, 128) (torch.bfloat16)
attn_bias : <class 'xformers.ops.fmha.attn_bias.BlockDiagonalCausalLocalAttentionMask'>
p : 0.0
decoderF
is not supported because:attn_bias type is <class 'xformers.ops.fmha.attn_bias.BlockDiagonalCausalLocalAttentionMask'>
bf16 is only supported on A100+ GPUs
[email protected]
is not supported because:requires device with capability > (8, 0) but your GPU has capability (7, 5) (too old)
bf16 is only supported on A100+ GPUs
cutlassF
is not supported because:bf16 is only supported on A100+ GPUs
smallkF
is not supported because:max(query.shape[-1] != value.shape[-1]) > 32
dtype=torch.bfloat16 (supported: {torch.float32})
attn_bias type is <class 'xformers.ops.fmha.attn_bias.BlockDiagonalCausalLocalAttentionMask'>
bf16 is only supported on A100+ GPUs
unsupported embed per head: 128
The text was updated successfully, but these errors were encountered: