Skip to content

Commit

Permalink
[Snippets][CPU] United LoopStatic and LoopDynamic into one node
Browse files Browse the repository at this point in the history
[Snippets] Applied Vladislav comment

[Snippets] Added quick return in jit_loop_begin_emitter emission

[Snippets] Applied last comments of Vladislav

[Snippets] Fixed ptr_increment initialization for dynamic cases
  • Loading branch information
a-sidorova committed May 27, 2024
1 parent af88a20 commit b0b4201
Show file tree
Hide file tree
Showing 25 changed files with 377 additions and 770 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

#include "pass.hpp"

#include "snippets/utils.hpp"

namespace ov {
namespace snippets {
namespace lowered {
Expand Down Expand Up @@ -46,6 +48,10 @@ class IdentifyBuffers: public RangedPass {
int64_t ptr_increment = 0;
int64_t finalization_offset = 0;

inline bool is_static() const {
return !utils::is_dynamic_value(ptr_increment) && !utils::is_dynamic_value(finalization_offset);
}

friend bool operator==(const ShiftPtrParams& lhs, const ShiftPtrParams& rhs);
friend bool operator!=(const ShiftPtrParams& lhs, const ShiftPtrParams& rhs);
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ class InsertLoops : public RangedPass {
bool run(LinearIR& linear_ir, lowered::LinearIR::constExprIt begin, lowered::LinearIR::constExprIt end) override;
private:
static void insertion(LinearIR& linear_ir, const LoopManagerPtr& loop_manager, size_t loop_id);
static bool is_loop_dynamic(const UnifiedLoopInfoPtr& loop_info);
};

} // namespace pass
Expand Down
71 changes: 15 additions & 56 deletions src/common/snippets/include/snippets/op/loop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,26 +40,13 @@ class LoopBegin : public LoopBase {
LoopBegin();

void validate_and_infer_types() override;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override;
std::shared_ptr<LoopEnd> get_loop_end() const;

protected:
void validate_and_infer_types_except_LoopEnd();
};

class LoopBeginStatic : public LoopBegin {
public:
OPENVINO_OP("LoopBeginStatic", "SnippetsOpset", LoopBegin);
LoopBeginStatic() = default;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override;
};

class LoopBeginDynamic : public LoopBegin {
public:
OPENVINO_OP("LoopBeginDynamic", "SnippetsOpset", LoopBegin);
LoopBeginDynamic() = default;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override;
};

/**
* @interface LoopEnd
* @brief Marks the end of the Loop region and defines the loop properties.
Expand All @@ -77,78 +64,50 @@ class LoopEnd : public LoopBase {
public:
OPENVINO_OP("LoopEnd", "SnippetsOpset", LoopBase);
LoopEnd() = default;
LoopEnd(const Output<Node>& loop_begin, size_t work_amount_increment, std::vector<bool> is_incremented,
LoopEnd(const Output<Node>& loop_begin, size_t work_amount, size_t work_amount_increment,
std::vector<bool> is_incremented, std::vector<int64_t> ptr_increments, std::vector<int64_t> finalization_offsets,
std::vector<int64_t> element_type_sizes, size_t input_num, size_t output_num, size_t id);

void validate_and_infer_types() override;
bool visit_attributes(AttributeVisitor& visitor) override;

std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override;

std::shared_ptr<LoopBegin> get_loop_begin();
const std::vector<bool>& get_is_incremented() const;
const std::vector<int64_t>& get_finalization_offsets() const;
const std::vector<int64_t>& get_ptr_increments() const;
const std::vector<int64_t>& get_element_type_sizes() const;
size_t get_work_amount() const;
size_t get_increment() const;
size_t get_id() const;
size_t get_input_num() const;
size_t get_output_num() const;
bool get_evaluate_once() const;
bool has_dynamic_params() const;

void set_is_incremented(std::vector<bool> is_incremented);
void set_finalization_offsets(std::vector<int64_t> offsets);
void set_ptr_increments(std::vector<int64_t> new_ptr_increments);
void set_work_amount(size_t new_work_amount);
void set_increment(size_t new_increment);
void set_evaluate_once(bool once);
void set_id(size_t id);

protected:
std::vector<bool> m_is_incremented = {};
std::vector<int64_t> m_ptr_increments = {};
std::vector<int64_t> m_finalization_offsets = {};
std::vector<int64_t> m_element_type_sizes = {};
size_t m_work_amount = 0;
size_t m_work_amount_increment = 0;
size_t m_input_num = 0;
size_t m_output_num = 0;
size_t m_id = 0; // the corresponding Loop identificator in LoopManager
};

class LoopEndStatic : public LoopEnd {
public:
OPENVINO_OP("LoopEndStatic", "SnippetsOpset", LoopEnd);
LoopEndStatic() = default;
LoopEndStatic(const Output<Node>& loop_begin, size_t work_amount, size_t work_amount_increment,
std::vector<bool> is_incremented, std::vector<int64_t> ptr_increments, std::vector<int64_t> finalization_offsets,
std::vector<int64_t> element_type_sizes, size_t input_num, size_t output_num, size_t id);
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override;

void validate_and_infer_types() override;
bool visit_attributes(AttributeVisitor& visitor) override;

// update_ptr_increments resets non-zero increments to the new_increments. It's used when work_amount_increment is
// updated and we need to refresh ptr increments accordingly while respecting the broadcasting pattern
void update_ptr_increments(int64_t new_increment);

const std::vector<int64_t>& get_finalization_offsets() const;
const std::vector<int64_t>& get_ptr_increments() const;
size_t get_work_amount() const;
bool get_evaluate_once() const;

void set_finalization_offsets(std::vector<int64_t> offsets);
void set_ptr_increments(std::vector<int64_t> new_ptr_increments);
void set_work_amount(size_t new_work_amount);
void set_evaluate_once(bool once);

protected:
std::vector<int64_t> m_ptr_increments = {};
std::vector<int64_t> m_finalization_offsets = {};
size_t m_work_amount = 0;
bool m_evaluate_once = false; // true if the Loop is executed only once, used to skip setting and testing the loop counter
};

class LoopEndDynamic : public LoopEnd {
public:
OPENVINO_OP("LoopEndDynamic", "SnippetsOpset", LoopEnd);
LoopEndDynamic() = default;
LoopEndDynamic(const Output<Node>& loop_begin, size_t work_amount_increment, std::vector<bool> is_incremented,
std::vector<int64_t> element_type_sizes, size_t input_num, size_t output_num, size_t id);

std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override;
};

} // namespace op
} // namespace snippets
} // namespace ov
22 changes: 8 additions & 14 deletions src/common/snippets/src/lowered/pass/clean_repeated_ptr_shifts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,22 +82,16 @@ bool CleanRepeatedDataPointerShifts::reuse_increments(const LoopManagerPtr& loop
// TODO [133463]: We have to update LoopEnd and LoopInfo since the both entities must be valid.
// To avoid the both changes, we have to insert Loop ops to LinearIR in the end of pipeline.
auto new_is_incremented = loop_end->get_is_incremented();
if (const auto loop_end_dynamic = ov::as_type_ptr<op::LoopEndDynamic>(loop_end_expr->get_node())) {
for (auto idx_to_drop : resetting_data_indexes) {
new_is_incremented[idx_to_drop] = false;
}
} else if (const auto loop_end_static = ov::as_type_ptr<op::LoopEndStatic>(loop_end_expr->get_node())) {
auto new_ptr_increments = loop_end_static->get_ptr_increments();
auto new_finalization_offsets = loop_end_static->get_finalization_offsets();
for (auto idx_to_drop : resetting_data_indexes) {
new_ptr_increments[idx_to_drop] = 0;
new_finalization_offsets[idx_to_drop] = 0;
new_is_incremented[idx_to_drop] = false;
}
loop_end_static->set_ptr_increments(new_ptr_increments);
loop_end_static->set_finalization_offsets(new_finalization_offsets);
auto new_ptr_increments = loop_end->get_ptr_increments();
auto new_finalization_offsets = loop_end->get_finalization_offsets();
for (auto idx_to_drop : resetting_data_indexes) {
new_is_incremented[idx_to_drop] = false;
new_ptr_increments[idx_to_drop] = 0;
new_finalization_offsets[idx_to_drop] = 0;
}
loop_end->set_is_incremented(new_is_incremented);
loop_end->set_ptr_increments(new_ptr_increments);
loop_end->set_finalization_offsets(new_finalization_offsets);

const auto loop_info = loop_manager->get_loop_info<UnifiedLoopInfo>(loop_end->get_id());
size_t loop_port_idx = 0;
Expand Down
9 changes: 6 additions & 3 deletions src/common/snippets/src/lowered/pass/cleanup_loop_offsets.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
#include "snippets/lowered/pass/cleanup_loop_offsets.hpp"

#include "snippets/lowered/linear_ir.hpp"
#include "snippets/snippets_isa.hpp"
#include "snippets/op/loop.hpp"
#include "snippets/utils.hpp"
#include "snippets/itt.hpp"

namespace ov {
Expand All @@ -18,7 +19,7 @@ bool CleanupLoopOffsets::run(lowered::LinearIR& linear_ir, lowered::LinearIR::co
bool is_modified = false;
for (auto expr_it = begin; expr_it != end; expr_it++) {
const auto& node = expr_it->get()->get_node();
if (auto loop_end = as_type_ptr<op::LoopEndStatic>(node)) {
if (auto loop_end = as_type_ptr<op::LoopEnd>(node)) {
auto next_expr_it = std::next(expr_it);
const auto& next_node = next_expr_it->get()->get_node();
// Note: Finalization offsets before the Result can be safely disregarded
Expand All @@ -29,7 +30,7 @@ bool CleanupLoopOffsets::run(lowered::LinearIR& linear_ir, lowered::LinearIR::co
loop_end->set_finalization_offsets(std::vector<int64_t>(fin_offsets.size(), 0));
is_modified = true;
}
if (auto outer_loop_end = as_type_ptr<op::LoopEndStatic>(next_node)) {
if (auto outer_loop_end = as_type_ptr<op::LoopEnd>(next_node)) {
const auto& is_incremented = loop_end->get_is_incremented();
const auto& data_sizes = loop_end->get_element_type_sizes();
auto fin_offsets = loop_end->get_finalization_offsets();
Expand All @@ -51,6 +52,8 @@ bool CleanupLoopOffsets::run(lowered::LinearIR& linear_ir, lowered::LinearIR::co
if (found != per_port_connector_offset.end()) {
if (!is_incremented[found->second] || outer_data_sizes[i] != data_sizes[found->second])
continue;
if (utils::is_dynamic_value(outer_ptr_increments[i]) || utils::is_dynamic_value(fin_offsets[found->second]))
continue;
// Since data ptr is incremented on [ptr_increment x increment],
// we should guarantee proportionality of ptr shifts.
// If the data ptr can't be proportionally shifted, the optimization is not applied
Expand Down
46 changes: 31 additions & 15 deletions src/common/snippets/src/lowered/pass/define_buffer_clusters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include "snippets/lowered/pass/identify_buffers.hpp"
#include "snippets/pass/tokenization.hpp"
#include "snippets/utils.hpp"
#include "snippets/itt.hpp"

namespace ov {
Expand Down Expand Up @@ -46,7 +47,7 @@ size_t DefineBufferClusters::get_cluster_buffer_id(const AllocateBuffers::Buffer
DefineBufferClusters::BufferPorts DefineBufferClusters::get_input_buffers(const ExpressionPtr& loop_expr) const {
BufferPorts input_buffers;

const auto loop_end = ov::as_type_ptr<op::LoopEndStatic>(loop_expr->get_node());
const auto loop_end = ov::as_type_ptr<op::LoopEnd>(loop_expr->get_node());
const auto in_count = loop_end->get_input_num();
const auto& connectors = loop_expr->get_input_port_connectors();

Expand All @@ -66,7 +67,7 @@ DefineBufferClusters::BufferPorts DefineBufferClusters::get_input_buffers(const
DefineBufferClusters::BufferPorts DefineBufferClusters::get_output_buffers(const ExpressionPtr& loop_expr) const {
BufferPorts output_buffers;

const auto loop_end = ov::as_type_ptr<op::LoopEndStatic>(loop_expr->get_node());
const auto loop_end = ov::as_type_ptr<op::LoopEnd>(loop_expr->get_node());
const auto in_count = loop_end->get_input_num();
const auto out_count = loop_end->get_output_num();
const auto& connectors = loop_expr->get_input_port_connectors();
Expand All @@ -85,7 +86,7 @@ DefineBufferClusters::BufferPorts DefineBufferClusters::get_output_buffers(const

void DefineBufferClusters::parse_loop(const LinearIR::constExprIt& expr_it) {
const auto& expr = *expr_it;
const auto loop_end = ov::as_type_ptr<op::LoopEndStatic>(expr->get_node());
const auto loop_end = ov::as_type_ptr<op::LoopEnd>(expr->get_node());
const auto& ptr_increments = loop_end->get_ptr_increments();
const auto& final_offsets = loop_end->get_finalization_offsets();
const auto& data_sizes = loop_end->get_element_type_sizes();
Expand All @@ -110,19 +111,30 @@ void DefineBufferClusters::parse_loop(const LinearIR::constExprIt& expr_it) {
continue;

const auto input_buffer = ov::as_type_ptr<op::Buffer>(input_buffer_expr->get_node());

// If allocated sizes of buffers are unkown on compilation stage (dynamic),
// we cannot be sure that they're will be the same in runtime.
if ((utils::is_dynamic_value(input_buffer->get_byte_size()) || utils::is_dynamic_value(output_buffer->get_byte_size())))
continue;

// Memory can be reused if reading and writing are executed proportionally:
// - the same reading/writing order
// - the same buffer memory sizes
if ((input_buffer->get_byte_size() != output_buffer->get_byte_size()) ||
(input_buffer_expr->get_output_port_descriptor(0)->get_layout() != output_buffer_expr->get_input_port_descriptor(0)->get_layout()))
continue;

// Also memory can be reused if there are the same ShiftPtrParams (data size, final offsets, ptr increments)
const auto& input_buffer_ports = in.second;
for (const auto& input_buffer_port_idx : input_buffer_ports) {
// Memory can be reused if reading and writing are executed proportionally:
// - the same ShiftPtrParams (data size, final offsets, ptr increments)
// - the same reading/writing order
// - the same buffer memory sizes
const auto input_params =
ShiftPtrParams(data_sizes[input_buffer_port_idx], ptr_increments[input_buffer_port_idx], final_offsets[input_buffer_port_idx]);
const auto output_params =
ShiftPtrParams(data_sizes[output_buffer_port_idx], ptr_increments[output_buffer_port_idx], final_offsets[output_buffer_port_idx]);
if (input_buffer->get_byte_size() == output_buffer->get_byte_size() &&
input_buffer_expr->get_output_port_descriptor(0)->get_layout() == output_buffer_expr->get_input_port_descriptor(0)->get_layout() &&
input_params == output_params) {

// If data pointer shift parameters are unknown on model compilation stage (dynamic),
// we cannot be sure that these data pointers will be proportionally shifted in runtime.
if (input_params.is_static() && output_params.is_static() && input_params == output_params) {
const auto cluster_it = find_cluster_by_expr(input_buffer_expr);
OPENVINO_ASSERT(cluster_it != m_clusters.end(), "Buffer on inputs of Loop must be already saved in clusters");
// Add to the existing cluster
Expand Down Expand Up @@ -157,11 +169,15 @@ void DefineBufferClusters::parse_nested_loops(const BufferPorts& input_buffers,

auto can_be_data_ptr_proportionally_shifted = [](int64_t outer_buffer_ptr_increment, int64_t outer_buffer_data_size,
int64_t inner_buffer_final_offsets, int64_t inner_buffer_data_size) {
// If data pointer shift parameters are unknown on model compilation stage (dynamic),
// we cannot be sure that these data pointers will be proportionally shifted in runtime.
if (utils::is_dynamic_value(outer_buffer_ptr_increment) || utils::is_dynamic_value(inner_buffer_final_offsets))
return false;
return (outer_buffer_ptr_increment != 0) &&
((inner_buffer_data_size * inner_buffer_final_offsets * -1) == outer_buffer_ptr_increment * outer_buffer_data_size);
};

const auto outer_loop_end = ov::as_type_ptr<op::LoopEndStatic>(outer_loop_end_expr_it->get()->get_node());
const auto outer_loop_end = ov::as_type_ptr<op::LoopEnd>(outer_loop_end_expr_it->get()->get_node());
const auto outer_loop_begin = outer_loop_end->get_loop_begin();
const auto& outer_ptr_increments = outer_loop_end->get_ptr_increments();
const auto& outer_data_sizes = outer_loop_end->get_element_type_sizes();
Expand Down Expand Up @@ -218,7 +234,7 @@ int64_t DefineBufferClusters::get_buffer_finalization_offset(const ExpressionPtr
const auto consumers = buffer_out->get_consumers();
for (const auto& consumer : consumers) {
const auto consumer_expr = consumer.get_expr();
const auto loop_end = ov::as_type_ptr<ov::snippets::op::LoopEndStatic>(consumer_expr->get_node());
const auto loop_end = ov::as_type_ptr<ov::snippets::op::LoopEnd>(consumer_expr->get_node());
if (loop_end && consumer_expr->get_loop_ids() == buffer_expr->get_loop_ids()) {
const auto loop_order = ov::snippets::pass::GetTopologicalOrder(loop_end);
if (loop_order > last_loop_exec_order) {
Expand All @@ -243,7 +259,7 @@ bool DefineBufferClusters::unite_nested_clusters(const AllocateBuffers::BufferCl
auto& up_idx = is_outer_up ? outer_idx : inner_idx;
auto& down_idx = is_outer_up ? inner_idx : outer_idx;
if (are_buffer_neighbours(up_buffer, down_buffer, common_loop_end_expr, up_idx, down_idx)) {
const auto common_loop_end = ov::as_type_ptr<op::LoopEndStatic>(common_loop_end_expr->get_node());
const auto common_loop_end = ov::as_type_ptr<op::LoopEnd>(common_loop_end_expr->get_node());
const auto& inner_ptr_increments = common_loop_end->get_ptr_increments();
const auto& inner_final_offsets = common_loop_end->get_finalization_offsets();
const auto& inner_data_sizes = common_loop_end->get_element_type_sizes();
Expand Down Expand Up @@ -289,7 +305,7 @@ bool DefineBufferClusters::are_buffer_neighbours(const ExpressionPtr& up, const
for (const auto& out : up->get_output_port_connectors()) {
for (const auto& buffer_consumer : out->get_consumers()) {
const auto buffer_consumer_expr = buffer_consumer.get_expr();
const auto loop_end = ov::as_type_ptr<op::LoopEndStatic>(buffer_consumer_expr->get_node());
const auto loop_end = ov::as_type_ptr<op::LoopEnd>(buffer_consumer_expr->get_node());
if (!loop_end)
continue;
const auto& loop_inputs = buffer_consumer_expr->get_input_port_connectors();
Expand Down Expand Up @@ -326,7 +342,7 @@ bool DefineBufferClusters::run(lowered::LinearIR& linear_ir, lowered::LinearIR::
for (auto expr_it = begin; expr_it != end; ++expr_it) {
const auto& expr = *expr_it;
const auto op = expr->get_node();
if (ov::is_type<op::LoopEndStatic>(op)) {
if (ov::is_type<op::LoopEnd>(op)) {
parse_loop(expr_it);
continue;
}
Expand Down
Loading

0 comments on commit b0b4201

Please sign in to comment.