Skip to content

Commit

Permalink
[GPU] Fix 4d fc for onednn
Browse files Browse the repository at this point in the history
  • Loading branch information
steve-y committed Sep 9, 2024
1 parent 102e875 commit 4586c52
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 2 deletions.
6 changes: 4 additions & 2 deletions src/plugins/intel_gpu/src/graph/fully_connected.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ layout fully_connected_inst::calc_output_layout(fully_connected_node const& node
output_type = impl_param.get_output_element_type();
}

const auto supports_immad = node.get_program().get_engine().get_device_info().supports_immad;

auto reshape_to_2d = [](const ov::PartialShape& shape, int64_t feature) {
auto staticShape = shape.to_shape();
size_t total = std::accumulate(staticShape.begin(), staticShape.end(), static_cast<size_t>(1), std::multiplies<size_t>());
Expand All @@ -117,7 +119,7 @@ layout fully_connected_inst::calc_output_layout(fully_connected_node const& node
feature = std::max({input_layout.spatial(0), input_layout.spatial(1), input_layout.spatial(2)});
}

if (desc->input_size > 4) {
if (desc->input_size > 3 || (!supports_immad && desc->input_size > 4)) {
input_layout.set_partial_shape(reshape_to_2d(input_pshape, feature));
}
if (weights_pshape.size() != 2) {
Expand All @@ -127,7 +129,7 @@ layout fully_connected_inst::calc_output_layout(fully_connected_node const& node
auto output_size = tensor(input_layout.batch(), weights_layout.batch(), 1, 1);
if (desc->input_size == 3) {
output_size = tensor(input_layout.batch(), input_layout.feature(), 1, weights_layout.batch());
} else if (desc->input_size == 4) {
} else if (!supports_immad && desc->input_size == 4) {
output_size = tensor(input_layout.batch(), input_layout.feature(), weights_layout.batch(), input_layout.spatial(1));
}
format output_format = get_preferred_format(node, impl_param);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,53 @@ TEST(fully_connected_gpu, no_biases_4d_input) {
ASSERT_EQ(outputs.begin()->second.get_layout().spatial(0), weight_b);
}

TEST(fully_connected_gpu, no_biases_4d_input_immad) {
auto& engine = get_test_engine();
if (!engine.get_device_info().supports_immad)
return;

// Input : 1x256x256x384
// Output : 65536x1536x1x1
// Weights: 1536x384x1x1

const int32_t input_b = 1, input_f = 256, input_y = 256, input_x = 384, // size of the whole input buffer
weight_b = 1536, weight_f = 384, weight_y = 1, weight_x = 1; // size of the whole weights buffer

auto input_prim = engine.allocate_memory({ data_types::f32, format::bfyx, { input_b, input_f, input_x, input_y } });
auto weights_prim = engine.allocate_memory({ data_types::f32, format::bfyx, { weight_b, weight_f, weight_x, weight_y } });

std::vector<float> input_data(input_b * input_f * input_y * input_x, 0);
std::vector<float> weights_data(weight_b * weight_f * weight_y * weight_x, 0);

set_values(input_prim, std::move(input_data));
set_values(weights_prim, std::move(weights_data));

auto input = input_layout("input", input_prim->get_layout());
auto w_data = data("weights", weights_prim);
auto fc = fully_connected("fc_prim", input_info("input"), "weights", "", 4, 2);
topology topology;
topology.add(input);
topology.add(w_data);
topology.add(fc);

ExecutionConfig config = get_test_default_config(engine);
// ov::intel_gpu::ImplementationDesc fc_impl = { format::bfyx, "fc_prim_immad", impl_types::onednn };
// config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl} }));
config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);
network.set_input_data("input", input_prim);

auto fc_impl = network.get_primitive("fc_prim")->get_impl();
ASSERT_TRUE(fc_impl != nullptr);
ASSERT_TRUE(fc_impl->is_onednn());

auto outputs = network.execute();
ASSERT_EQ(outputs.begin()->second.get_layout().batch(), input_f*input_y);
ASSERT_EQ(outputs.begin()->second.get_layout().feature(), weight_b);
ASSERT_EQ(outputs.begin()->second.get_layout().spatial(1), weight_y);
ASSERT_EQ(outputs.begin()->second.get_layout().spatial(0), weight_x);
}

TEST(fully_connected_gpu, xb_f32_batch_1) {
// Input : 3x1
// Output : 4x1
Expand Down

0 comments on commit 4586c52

Please sign in to comment.