You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
{{ message }}
This repository has been archived by the owner on Oct 16, 2023. It is now read-only.
I wrote the following code, I think the dimentions of tensors are correct. What should I do?
class LlamaMLP(nn.Module):
def init(
self,
hidden_size: int,
intermediate_size: int,
hidden_act: str,
):
super().init()
# self.gate_proj = nn.Linear(hidden_size, intermediate_size, bias=False)
self.gate_proj = col_nn.Linear(hidden_size, intermediate_size, dtype=torch.float, bias=False)
# self.down_proj = nn.Linear(intermediate_size, hidden_size, bias=False)
self.down_proj = col_nn.Linear(intermediate_size, hidden_size, dtype=torch.float, bias=False)
# self.up_proj = nn.Linear(hidden_size, intermediate_size, bias=False)
self.up_proj = col_nn.Linear(hidden_size, intermediate_size, dtype=torch.float, bias=False)
self.act_fn = ACT2FN[hidden_act]
def forward(self, x):
up = self.act_fn(self.gate_proj(x)) * self.up_proj(x)
down = self.down_proj(up)
return down
the output I get is:
Traceback (most recent call last):
File "train.py", line 158, in
layer_outputs = decoder_layer(
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in _call_impl
return forward_call(*input, **kwargs)
File "/nas-alinlp/butyuhao/GLM/colossal-ai/llama/modeling_llama.py", line 348, in forward
main()
File "train.py", line 151, in main
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in _call_impl
return forward_call(*input, **kwargs)
File "/nas-alinlp/butyuhao/GLM/colossal-ai/llama/modeling_llama.py", line 218, in forward
engine.execute_schedule(data_iter, return_output_label=False)
File "/opt/conda/lib/python3.8/site-packages/colossalai/engine/_base_engine.py", line 201, in execute_schedule
output, label, loss = self._schedule.forward_backward_step(self, data_iter, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/colossalai/engine/schedule/_non_pipeline_schedule.py", line 78, in forward_backward_step
output = self._call_engine(engine, data)
File "/opt/conda/lib/python3.8/site-packages/colossalai/engine/schedule/_base_schedule.py", line 109, in _call_engine
return engine(inputs)
File "/opt/conda/lib/python3.8/site-packages/colossalai/engine/_base_engine.py", line 186, in call
return self.model(*args, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in _call_impl
return forward_call(*input, **kwargs)
File "/nas-alinlp/butyuhao/GLM/colossal-ai/llama/modeling_llama.py", line 661, in forward
qkv = self.query_key_value(hidden_states)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/colossalai/nn/layer/colossalai_layer/_utils.py", line 41, in forward
return self.module(*args)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/colossalai/nn/layer/colossalai_layer/_utils.py", line 41, in forward
return self.module(*args)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/colossalai/nn/layer/parallel_1d/layers.py", line 697, in forward
assert input.shape[-1] == self.weight.shape[-1],
AssertionError: Invalid shapes in Linear1D_Row forward: input=torch.Size([1, 128, 4096]), weight=torch.Size([12288, 2048]). Expected last dim of input 2048.
layer_outputs = decoder_layer(
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in _call_impl
return forward_call(*input, **kwargs)
File "/nas-alinlp/butyuhao/GLM/colossal-ai/llama/modeling_llama.py", line 348, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in _call_impl
return forward_call(*input, **kwargs)
File "/nas-alinlp/butyuhao/GLM/colossal-ai/llama/modeling_llama.py", line 218, in forward
qkv = self.query_key_value(hidden_states)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/colossalai/nn/layer/colossalai_layer/_utils.py", line 41, in forward
return self.module(*args)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/colossalai/nn/layer/colossalai_layer/_utils.py", line 41, in forward
return self.module(*args)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/colossalai/nn/layer/parallel_1d/layers.py", line 697, in forward
assert input.shape[-1] == self.weight.shape[-1],
AssertionError: Invalid shapes in Linear1D_Row forward: input=torch.Size([1, 128, 4096]), weight=torch.Size([12288, 2048]). Expected last dim of input 2048.
ERROR:torch.distributed.elastic.multiprocessing.api:failed (exitcode: 1) local_rank: 0 (pid: 60198) of binary: /opt/conda/bin/python
🐛 Describe the bug
I wrote the following code, I think the dimentions of tensors are correct. What should I do?
class LlamaMLP(nn.Module):
def init(
self,
hidden_size: int,
intermediate_size: int,
hidden_act: str,
):
super().init()
# self.gate_proj = nn.Linear(hidden_size, intermediate_size, bias=False)
self.gate_proj = col_nn.Linear(hidden_size, intermediate_size, dtype=torch.float, bias=False)
# self.down_proj = nn.Linear(intermediate_size, hidden_size, bias=False)
self.down_proj = col_nn.Linear(intermediate_size, hidden_size, dtype=torch.float, bias=False)
# self.up_proj = nn.Linear(hidden_size, intermediate_size, bias=False)
self.up_proj = col_nn.Linear(hidden_size, intermediate_size, dtype=torch.float, bias=False)
self.act_fn = ACT2FN[hidden_act]
the output I get is:
Traceback (most recent call last):
File "train.py", line 158, in
layer_outputs = decoder_layer(
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in _call_impl
return forward_call(*input, **kwargs)
File "/nas-alinlp/butyuhao/GLM/colossal-ai/llama/modeling_llama.py", line 348, in forward
main()
File "train.py", line 151, in main
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in _call_impl
return forward_call(*input, **kwargs)
File "/nas-alinlp/butyuhao/GLM/colossal-ai/llama/modeling_llama.py", line 218, in forward
engine.execute_schedule(data_iter, return_output_label=False)
File "/opt/conda/lib/python3.8/site-packages/colossalai/engine/_base_engine.py", line 201, in execute_schedule
output, label, loss = self._schedule.forward_backward_step(self, data_iter, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/colossalai/engine/schedule/_non_pipeline_schedule.py", line 78, in forward_backward_step
output = self._call_engine(engine, data)
File "/opt/conda/lib/python3.8/site-packages/colossalai/engine/schedule/_base_schedule.py", line 109, in _call_engine
return engine(inputs)
File "/opt/conda/lib/python3.8/site-packages/colossalai/engine/_base_engine.py", line 186, in call
return self.model(*args, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in _call_impl
return forward_call(*input, **kwargs)
File "/nas-alinlp/butyuhao/GLM/colossal-ai/llama/modeling_llama.py", line 661, in forward
qkv = self.query_key_value(hidden_states)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/colossalai/nn/layer/colossalai_layer/_utils.py", line 41, in forward
return self.module(*args)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/colossalai/nn/layer/colossalai_layer/_utils.py", line 41, in forward
return self.module(*args)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/colossalai/nn/layer/parallel_1d/layers.py", line 697, in forward
assert input.shape[-1] == self.weight.shape[-1],
AssertionError: Invalid shapes in Linear1D_Row forward: input=torch.Size([1, 128, 4096]), weight=torch.Size([12288, 2048]). Expected last dim of input 2048.
layer_outputs = decoder_layer(
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in _call_impl
return forward_call(*input, **kwargs)
File "/nas-alinlp/butyuhao/GLM/colossal-ai/llama/modeling_llama.py", line 348, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in _call_impl
return forward_call(*input, **kwargs)
File "/nas-alinlp/butyuhao/GLM/colossal-ai/llama/modeling_llama.py", line 218, in forward
qkv = self.query_key_value(hidden_states)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/colossalai/nn/layer/colossalai_layer/_utils.py", line 41, in forward
return self.module(*args)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/colossalai/nn/layer/colossalai_layer/_utils.py", line 41, in forward
return self.module(*args)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/colossalai/nn/layer/parallel_1d/layers.py", line 697, in forward
assert input.shape[-1] == self.weight.shape[-1],
AssertionError: Invalid shapes in Linear1D_Row forward: input=torch.Size([1, 128, 4096]), weight=torch.Size([12288, 2048]). Expected last dim of input 2048.
ERROR:torch.distributed.elastic.multiprocessing.api:failed (exitcode: 1) local_rank: 0 (pid: 60198) of binary: /opt/conda/bin/python
Environment
TENSOR_PARALLEL_SIZE = 2
TENSOR_PARALLEL_MODE = '1d'
parallel = dict(
pipeline=1,
tensor=dict(mode=TENSOR_PARALLEL_MODE, size=TENSOR_PARALLEL_SIZE),
)
batch_size=1
The text was updated successfully, but these errors were encountered: