Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix to_layout sharded bug #17820

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 41 additions & 1 deletion tests/ttnn/unit_tests/test_to_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import ttnn

from tests.ttnn.utils_for_testing import assert_with_pcc, check_with_pcc_without_tensor_printout
from models.utility_functions import is_grayskull, is_blackhole, torch_random, skip_for_grayskull
from models.utility_functions import is_grayskull, is_blackhole, torch_random, skip_for_grayskull, skip_for_wormhole_b0


@pytest.mark.parametrize("height", [32, 30])
Expand Down Expand Up @@ -339,3 +339,43 @@ def test_untilize_w4(shape, input_layout, output_layout, device):
output_tensor = ttnn.to_torch(output_tensor)

assert_with_pcc(input_a[:, :, :1, :10912], output_tensor)


@skip_for_wormhole_b0()
def test_shard_untilize(device):
torch.manual_seed(2005)

torch_tensor = torch.rand(1, 1, 29640, 128, dtype=torch.bfloat16)

sharded_memory_config = ttnn.create_sharded_memory_config(
[
480,
128,
],
core_grid=ttnn.CoreRangeSet(
{
ttnn.CoreRange(
ttnn.CoreCoord(0, 0),
ttnn.CoreCoord(7, 6),
),
ttnn.CoreRange(
ttnn.CoreCoord(0, 7),
ttnn.CoreCoord(5, 7),
),
}
),
strategy=ttnn.ShardStrategy.HEIGHT,
use_height_and_width_as_shard_shape=True,
)

input_tensor = ttnn.from_torch(
torch_tensor, layout=ttnn.TILE_LAYOUT, device=device, memory_config=sharded_memory_config
)

output_tensor = ttnn.to_layout(input_tensor, layout=ttnn.ROW_MAJOR_LAYOUT, memory_config=ttnn.DRAM_MEMORY_CONFIG)
print(f"output_tensor.memory_config()={output_tensor.memory_config()}")
assert output_tensor.memory_config() == ttnn.DRAM_MEMORY_CONFIG, "Memory config is not DRAM"

output_tensor = ttnn.to_torch(output_tensor)
assert torch_tensor.shape == output_tensor.shape
assert_with_pcc(torch_tensor, output_tensor, 0.9999)
14 changes: 8 additions & 6 deletions ttnn/cpp/ttnn/operations/core/to_layout/to_layout_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,16 @@ Tensor to_layout_impl(
auto tensor = tensor_arg;
const auto tile = tensor.get_tensor_spec().tile();
auto output_shape = tensor_arg.get_logical_shape();

auto output_memory_config =
memory_config.value_or(ttnn::get_memory_config(tensor).value_or(ttnn::DRAM_MEMORY_CONFIG));
if (memory_config.has_value() && tensor.is_sharded()) {
output_memory_config = memory_config.value();
if ((output_memory_config == ttnn::DRAM_MEMORY_CONFIG && ttnn::get_memory_config(tensor)->is_l1()) ||
(output_memory_config == ttnn::L1_MEMORY_CONFIG && ttnn::get_memory_config(tensor)->is_dram())) {
tensor = ttnn::to_memory_config(tensor, output_memory_config);
}
}

TensorSpec tile_spec(
tensor_arg.get_logical_shape(),
Expand Down Expand Up @@ -141,17 +149,11 @@ Tensor to_layout_impl(
!dtype.has_value() || dtype.value() == tensor_arg.dtype(),
"dtype cannot be different from tensor dtype when converting to ROW_MAJOR_LAYOUT on device!");

if (tensor.is_sharded()) {
const auto memory_config = tensor.memory_config();
output_memory_config =
tt::tt_metal::MemoryConfig{memory_config.memory_layout, memory_config.buffer_type};
}
Shape output_tensor_end(SmallVector<uint32_t>(tensor.logical_shape().rank(), 0));
int logical_rank = tensor.get_logical_shape().rank();
for (int index = -1; index >= -logical_rank; --index) {
output_tensor_end[index] = tensor.get_logical_shape()[index] - 1;
}

tensor =
ttnn::untilize_with_unpadding(tensor, output_tensor_end, output_memory_config, use_multicore_untilize);
return ttnn::reshape(tensor, ttnn::Shape{output_shape});
Expand Down
Loading