tenstorrent · nardoTT · Feb 8, 2025 · Feb 8, 2025 · Feb 8, 2025 · Feb 10, 2025
@@ -10,7 +10,7 @@
 import ttnn
 
 from tests.ttnn.utils_for_testing import assert_with_pcc, check_with_pcc_without_tensor_printout
-from models.utility_functions import is_grayskull, is_blackhole, torch_random, skip_for_grayskull
+from models.utility_functions import is_grayskull, is_blackhole, torch_random, skip_for_grayskull, skip_for_wormhole_b0
 
 
 @pytest.mark.parametrize("height", [32, 30])
@@ -339,3 +339,43 @@ def test_untilize_w4(shape, input_layout, output_layout, device):
     output_tensor = ttnn.to_torch(output_tensor)
 
     assert_with_pcc(input_a[:, :, :1, :10912], output_tensor)
+
+
+@skip_for_wormhole_b0()
+def test_shard_untilize(device):
+    torch.manual_seed(2005)
+
+    torch_tensor = torch.rand(1, 1, 29640, 128, dtype=torch.bfloat16)
+
+    sharded_memory_config = ttnn.create_sharded_memory_config(
+        [
+            480,
+            128,
+        ],
+        core_grid=ttnn.CoreRangeSet(
+            {
+                ttnn.CoreRange(
+                    ttnn.CoreCoord(0, 0),
+                    ttnn.CoreCoord(7, 6),
+                ),
+                ttnn.CoreRange(
+                    ttnn.CoreCoord(0, 7),
+                    ttnn.CoreCoord(5, 7),
+                ),
+            }
+        ),
+        strategy=ttnn.ShardStrategy.HEIGHT,
+        use_height_and_width_as_shard_shape=True,
+    )
+
+    input_tensor = ttnn.from_torch(
+        torch_tensor, layout=ttnn.TILE_LAYOUT, device=device, memory_config=sharded_memory_config
+    )
+
+    output_tensor = ttnn.to_layout(input_tensor, layout=ttnn.ROW_MAJOR_LAYOUT, memory_config=ttnn.DRAM_MEMORY_CONFIG)
+    print(f"output_tensor.memory_config()={output_tensor.memory_config()}")
+    assert output_tensor.memory_config() == ttnn.DRAM_MEMORY_CONFIG, "Memory config is not DRAM"
+
+    output_tensor = ttnn.to_torch(output_tensor)
+    assert torch_tensor.shape == output_tensor.shape
+    assert_with_pcc(torch_tensor, output_tensor, 0.9999)
@@ -93,8 +93,16 @@ Tensor to_layout_impl(
     auto tensor = tensor_arg;
     const auto tile = tensor.get_tensor_spec().tile();
     auto output_shape = tensor_arg.get_logical_shape();
+
     auto output_memory_config =
         memory_config.value_or(ttnn::get_memory_config(tensor).value_or(ttnn::DRAM_MEMORY_CONFIG));
+    if (memory_config.has_value() && tensor.is_sharded()) {
+        output_memory_config = memory_config.value();
+        if ((output_memory_config == ttnn::DRAM_MEMORY_CONFIG && ttnn::get_memory_config(tensor)->is_l1()) ||
+            (output_memory_config == ttnn::L1_MEMORY_CONFIG && ttnn::get_memory_config(tensor)->is_dram())) {
+            tensor = ttnn::to_memory_config(tensor, output_memory_config);
+        }
+    }
 
     TensorSpec tile_spec(
         tensor_arg.get_logical_shape(),
@@ -141,17 +149,11 @@ Tensor to_layout_impl(
                 !dtype.has_value() || dtype.value() == tensor_arg.dtype(),
                 "dtype cannot be different from tensor dtype when converting to ROW_MAJOR_LAYOUT on device!");
 
-            if (tensor.is_sharded()) {
-                const auto memory_config = tensor.memory_config();
-                output_memory_config =
-                    tt::tt_metal::MemoryConfig{memory_config.memory_layout, memory_config.buffer_type};
-            }
             Shape output_tensor_end(SmallVector<uint32_t>(tensor.logical_shape().rank(), 0));
             int logical_rank = tensor.get_logical_shape().rank();
             for (int index = -1; index >= -logical_rank; --index) {
                 output_tensor_end[index] = tensor.get_logical_shape()[index] - 1;
             }
-
             tensor =
                 ttnn::untilize_with_unpadding(tensor, output_tensor_end, output_memory_config, use_multicore_untilize);
             return ttnn::reshape(tensor, ttnn::Shape{output_shape});