From 6e7ec66528e5606d9d1d122d66553aa58ed4cdcf Mon Sep 17 00:00:00 2001 From: zhangstar333 <87313068+zhangstar333@users.noreply.github.com> Date: Mon, 27 May 2024 14:14:33 +0800 Subject: [PATCH] [improve](ub) fix some runtime error of ubsan when downcast (#35343) those code could work well, but it will be report some runtime error under UBSAN, so refactor it to let's ubsan could running happy. --- .../exec/aggregation_sink_operator.cpp | 6 ++--- .../exec/aggregation_source_operator.cpp | 5 ++--- .../exec/streaming_aggregation_operator.cpp | 6 ++--- .../aggregate_functions/aggregate_function.h | 22 +++++++++++-------- .../aggregate_function_avg.h | 4 ++-- .../aggregate_function_bitmap.h | 12 +++++----- .../aggregate_function_bitmap_agg.h | 12 +++++----- .../aggregate_function_collect.h | 10 ++++----- .../aggregate_function_count.h | 8 +++---- .../aggregate_function_map.h | 8 +++---- .../aggregate_function_min_max.h | 4 ++-- .../aggregate_function_sum.h | 4 ++-- .../aggregate_function_uniq_distribute_key.h | 4 ++-- be/src/vec/exec/vaggregation_node.h | 8 +++---- 14 files changed, 55 insertions(+), 58 deletions(-) diff --git a/be/src/pipeline/exec/aggregation_sink_operator.cpp b/be/src/pipeline/exec/aggregation_sink_operator.cpp index 7d1840f95a4267..d79623ba4c3130 100644 --- a/be/src/pipeline/exec/aggregation_sink_operator.cpp +++ b/be/src/pipeline/exec/aggregation_sink_operator.cpp @@ -303,8 +303,7 @@ Status AggSinkLocalState::_merge_with_serialized_key_helper(vectorized::Block* b _places.data(), Base::_parent->template cast() ._offsets_of_aggregate_states[i], - _deserialize_buffer.data(), - (vectorized::ColumnString*)(column.get()), _agg_arena_pool, + _deserialize_buffer.data(), column.get(), _agg_arena_pool, rows); } } else { @@ -348,8 +347,7 @@ Status AggSinkLocalState::_merge_with_serialized_key_helper(vectorized::Block* b _places.data(), Base::_parent->template cast() ._offsets_of_aggregate_states[i], - _deserialize_buffer.data(), - (vectorized::ColumnString*)(column.get()), _agg_arena_pool, + _deserialize_buffer.data(), column.get(), _agg_arena_pool, rows); } } else { diff --git a/be/src/pipeline/exec/aggregation_source_operator.cpp b/be/src/pipeline/exec/aggregation_source_operator.cpp index 33e095b9612cbb..8545fe42f6829d 100644 --- a/be/src/pipeline/exec/aggregation_source_operator.cpp +++ b/be/src/pipeline/exec/aggregation_source_operator.cpp @@ -500,8 +500,7 @@ Status AggLocalState::merge_with_serialized_key_helper(vectorized::Block* block) ->function() ->deserialize_and_merge_vec_selected( _places.data(), _shared_state->offsets_of_aggregate_states[i], - _deserialize_buffer.data(), - (vectorized::ColumnString*)(column.get()), + _deserialize_buffer.data(), column.get(), _shared_state->agg_arena_pool.get(), rows); } } else { @@ -532,7 +531,7 @@ Status AggLocalState::merge_with_serialized_key_helper(vectorized::Block* block) SCOPED_TIMER(_deserialize_data_timer); Base::_shared_state->aggregate_evaluators[i]->function()->deserialize_and_merge_vec( _places.data(), _shared_state->offsets_of_aggregate_states[i], - _deserialize_buffer.data(), (vectorized::ColumnString*)(column.get()), + _deserialize_buffer.data(), column.get(), _shared_state->agg_arena_pool.get(), rows); } } diff --git a/be/src/pipeline/exec/streaming_aggregation_operator.cpp b/be/src/pipeline/exec/streaming_aggregation_operator.cpp index 30ae419d828afc..bea67b3050a24a 100644 --- a/be/src/pipeline/exec/streaming_aggregation_operator.cpp +++ b/be/src/pipeline/exec/streaming_aggregation_operator.cpp @@ -261,8 +261,7 @@ Status StreamingAggLocalState::_merge_with_serialized_key_helper(vectorized::Blo _places.data(), Base::_parent->template cast() ._offsets_of_aggregate_states[i], - _deserialize_buffer.data(), (vectorized::ColumnString*)(column.get()), - _agg_arena_pool.get(), rows); + _deserialize_buffer.data(), column.get(), _agg_arena_pool.get(), rows); } } else { RETURN_IF_ERROR(_aggregate_evaluators[i]->execute_batch_add_selected( @@ -299,8 +298,7 @@ Status StreamingAggLocalState::_merge_with_serialized_key_helper(vectorized::Blo _places.data(), Base::_parent->template cast() ._offsets_of_aggregate_states[i], - _deserialize_buffer.data(), (vectorized::ColumnString*)(column.get()), - _agg_arena_pool.get(), rows); + _deserialize_buffer.data(), column.get(), _agg_arena_pool.get(), rows); } } else { RETURN_IF_ERROR(_aggregate_evaluators[i]->execute_batch_add( diff --git a/be/src/vec/aggregate_functions/aggregate_function.h b/be/src/vec/aggregate_functions/aggregate_function.h index 44d0db7c9d9f0a..082f27e7318345 100644 --- a/be/src/vec/aggregate_functions/aggregate_function.h +++ b/be/src/vec/aggregate_functions/aggregate_function.h @@ -22,6 +22,8 @@ #include "util/defer_op.h" #include "vec/columns/column_complex.h" +#include "vec/columns/column_string.h" +#include "vec/common/assert_cast.h" #include "vec/common/hash_table/phmap_fwd_decl.h" #include "vec/common/string_buffer.hpp" #include "vec/core/block.h" @@ -144,13 +146,12 @@ class IAggregateFunction { size_t num_rows) const = 0; virtual void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const ColumnString* column, + AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const = 0; virtual void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, - const ColumnString* column, Arena* arena, - const size_t num_rows) const = 0; + AggregateDataPtr rhs, const IColumn* column, + Arena* arena, const size_t num_rows) const = 0; virtual void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena* arena, size_t num_rows) const = 0; @@ -375,13 +376,14 @@ class IAggregateFunctionHelper : public IAggregateFunction { } void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const ColumnString* column, Arena* arena, + AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const override { const auto size_of_data = assert_cast(this)->size_of_data(); + const auto* column_string = assert_cast(column); for (size_t i = 0; i != num_rows; ++i) { try { auto rhs_place = rhs + size_of_data * i; - VectorBufferReader buffer_reader(column->get_data_at(i)); + VectorBufferReader buffer_reader(column_string->get_data_at(i)); assert_cast(this)->create(rhs_place); assert_cast(this)->deserialize_and_merge( places[i] + offset, rhs_place, buffer_reader, arena); @@ -397,17 +399,19 @@ class IAggregateFunctionHelper : public IAggregateFunction { } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const ColumnString* column, + AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const override { const auto size_of_data = assert_cast(this)->size_of_data(); + const auto* column_string = assert_cast(column); for (size_t i = 0; i != num_rows; ++i) { try { auto rhs_place = rhs + size_of_data * i; - VectorBufferReader buffer_reader(column->get_data_at(i)); + VectorBufferReader buffer_reader(column_string->get_data_at(i)); assert_cast(this)->create(rhs_place); - if (places[i]) + if (places[i]) { assert_cast(this)->deserialize_and_merge( places[i] + offset, rhs_place, buffer_reader, arena); + } } catch (...) { for (int j = 0; j < i; ++j) { auto place = rhs + size_of_data * j; diff --git a/be/src/vec/aggregate_functions/aggregate_function_avg.h b/be/src/vec/aggregate_functions/aggregate_function_avg.h index 268c2dbf032a8e..6827c6db373667 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_avg.h +++ b/be/src/vec/aggregate_functions/aggregate_function_avg.h @@ -244,7 +244,7 @@ class AggregateFunctionAvg final } void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const ColumnString* column, Arena* arena, + AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const override { this->deserialize_from_column(rhs, *column, arena, num_rows); DEFER({ this->destroy_vec(rhs, num_rows); }); @@ -252,7 +252,7 @@ class AggregateFunctionAvg final } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const ColumnString* column, + AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const override { this->deserialize_from_column(rhs, *column, arena, num_rows); DEFER({ this->destroy_vec(rhs, num_rows); }); diff --git a/be/src/vec/aggregate_functions/aggregate_function_bitmap.h b/be/src/vec/aggregate_functions/aggregate_function_bitmap.h index d3db5257b45a79..dd7af71de06ae0 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_bitmap.h +++ b/be/src/vec/aggregate_functions/aggregate_function_bitmap.h @@ -222,11 +222,11 @@ class AggregateFunctionBitmapSerializationHelper } void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const ColumnString* column, Arena* arena, + AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const override { if (version >= BITMAP_SERDE) { - auto& col = assert_cast(*assert_cast(column)); - auto* data = col.get_data().data(); + const auto& col = assert_cast(*column); + const auto* data = col.get_data().data(); for (size_t i = 0; i != num_rows; ++i) { this->data(places[i] + offset).merge(data[i]); } @@ -236,11 +236,11 @@ class AggregateFunctionBitmapSerializationHelper } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const ColumnString* column, + AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const override { if (version >= BITMAP_SERDE) { - auto& col = assert_cast(*assert_cast(column)); - auto* data = col.get_data().data(); + const auto& col = assert_cast(*column); + const auto* data = col.get_data().data(); for (size_t i = 0; i != num_rows; ++i) { if (places[i]) { this->data(places[i] + offset).merge(data[i]); diff --git a/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h b/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h index 000a6dab36bf0e..ce80b38d0913ba 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h +++ b/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h @@ -185,20 +185,20 @@ class AggregateFunctionBitmapAgg final } void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const ColumnString* column, Arena* arena, + AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const override { - auto& col = assert_cast(*assert_cast(column)); - auto* data = col.get_data().data(); + const auto& col = assert_cast(*column); + const auto* data = col.get_data().data(); for (size_t i = 0; i != num_rows; ++i) { this->data(places[i] + offset).value |= data[i]; } } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const ColumnString* column, + AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const override { - auto& col = assert_cast(*assert_cast(column)); - auto* data = col.get_data().data(); + const auto& col = assert_cast(*column); + const auto* data = col.get_data().data(); for (size_t i = 0; i != num_rows; ++i) { if (places[i]) { this->data(places[i] + offset).value |= data[i]; diff --git a/be/src/vec/aggregate_functions/aggregate_function_collect.h b/be/src/vec/aggregate_functions/aggregate_function_collect.h index ac555982461754..4da6e023eb3949 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_collect.h +++ b/be/src/vec/aggregate_functions/aggregate_function_collect.h @@ -634,12 +634,11 @@ class AggregateFunctionCollect } void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const ColumnString* column, Arena* arena, + AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const override { if constexpr (ShowNull::value) { for (size_t i = 0; i != num_rows; ++i) { - this->data(places[i] + offset) - .deserialize_and_merge(*assert_cast(column), i); + this->data(places[i] + offset).deserialize_and_merge(*column, i); } } else { return BaseHelper::deserialize_and_merge_vec(places, offset, rhs, column, arena, @@ -674,13 +673,12 @@ class AggregateFunctionCollect } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const ColumnString* column, + AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const override { if constexpr (ShowNull::value) { for (size_t i = 0; i != num_rows; ++i) { if (places[i]) { - this->data(places[i] + offset) - .deserialize_and_merge(*assert_cast(column), i); + this->data(places[i] + offset).deserialize_and_merge(*column, i); } } } else { diff --git a/be/src/vec/aggregate_functions/aggregate_function_count.h b/be/src/vec/aggregate_functions/aggregate_function_count.h index bf44b944bda021..7449c949cb9047 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_count.h +++ b/be/src/vec/aggregate_functions/aggregate_function_count.h @@ -146,7 +146,7 @@ class AggregateFunctionCount final } void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const ColumnString* column, Arena* arena, + AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const override { this->deserialize_from_column(rhs, *column, arena, num_rows); DEFER({ this->destroy_vec(rhs, num_rows); }); @@ -154,7 +154,7 @@ class AggregateFunctionCount final } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const ColumnString* column, + AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const override { this->deserialize_from_column(rhs, *column, arena, num_rows); DEFER({ this->destroy_vec(rhs, num_rows); }); @@ -284,7 +284,7 @@ class AggregateFunctionCountNotNullUnary final } void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const ColumnString* column, Arena* arena, + AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const override { this->deserialize_from_column(rhs, *column, arena, num_rows); DEFER({ this->destroy_vec(rhs, num_rows); }); @@ -292,7 +292,7 @@ class AggregateFunctionCountNotNullUnary final } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const ColumnString* column, + AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const override { this->deserialize_from_column(rhs, *column, arena, num_rows); DEFER({ this->destroy_vec(rhs, num_rows); }); diff --git a/be/src/vec/aggregate_functions/aggregate_function_map.h b/be/src/vec/aggregate_functions/aggregate_function_map.h index d8eb031c5e59bb..0f1a298aed10f3 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_map.h +++ b/be/src/vec/aggregate_functions/aggregate_function_map.h @@ -300,9 +300,9 @@ class AggregateFunctionMapAgg final } void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const ColumnString* column, Arena* arena, + AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const override { - auto& col = assert_cast(*assert_cast(column)); + const auto& col = assert_cast(*column); for (size_t i = 0; i != num_rows; ++i) { auto map = doris::vectorized::get(col[i]); this->data(places[i] + offset).add(map[0], map[1]); @@ -310,9 +310,9 @@ class AggregateFunctionMapAgg final } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const ColumnString* column, + AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const override { - auto& col = assert_cast(*assert_cast(column)); + const auto& col = assert_cast(*column); for (size_t i = 0; i != num_rows; ++i) { if (places[i]) { auto map = doris::vectorized::get(col[i]); diff --git a/be/src/vec/aggregate_functions/aggregate_function_min_max.h b/be/src/vec/aggregate_functions/aggregate_function_min_max.h index dfc0cbae7f42fb..0d724e7ee5f45e 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_min_max.h +++ b/be/src/vec/aggregate_functions/aggregate_function_min_max.h @@ -624,7 +624,7 @@ class AggregateFunctionsSingleValue final } void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const ColumnString* column, Arena* arena, + AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const override { this->deserialize_from_column(rhs, *column, arena, num_rows); DEFER({ this->destroy_vec(rhs, num_rows); }); @@ -632,7 +632,7 @@ class AggregateFunctionsSingleValue final } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const ColumnString* column, + AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const override { this->deserialize_from_column(rhs, *column, arena, num_rows); DEFER({ this->destroy_vec(rhs, num_rows); }); diff --git a/be/src/vec/aggregate_functions/aggregate_function_sum.h b/be/src/vec/aggregate_functions/aggregate_function_sum.h index b53d011e5f1268..376b6ece4aafea 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_sum.h +++ b/be/src/vec/aggregate_functions/aggregate_function_sum.h @@ -183,7 +183,7 @@ class AggregateFunctionSum final } void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const ColumnString* column, Arena* arena, + AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const override { this->deserialize_from_column(rhs, *column, arena, num_rows); DEFER({ this->destroy_vec(rhs, num_rows); }); @@ -191,7 +191,7 @@ class AggregateFunctionSum final } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const ColumnString* column, + AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const override { this->deserialize_from_column(rhs, *column, arena, num_rows); DEFER({ this->destroy_vec(rhs, num_rows); }); diff --git a/be/src/vec/aggregate_functions/aggregate_function_uniq_distribute_key.h b/be/src/vec/aggregate_functions/aggregate_function_uniq_distribute_key.h index 0fa66e3423041b..3eaa6418f0b7ca 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_uniq_distribute_key.h +++ b/be/src/vec/aggregate_functions/aggregate_function_uniq_distribute_key.h @@ -215,7 +215,7 @@ class AggregateFunctionUniqDistributeKey final } void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const ColumnString* column, Arena* arena, + AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const override { this->deserialize_from_column(rhs, *column, arena, num_rows); DEFER({ this->destroy_vec(rhs, num_rows); }); @@ -223,7 +223,7 @@ class AggregateFunctionUniqDistributeKey final } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const ColumnString* column, + AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const override { this->deserialize_from_column(rhs, *column, arena, num_rows); DEFER({ this->destroy_vec(rhs, num_rows); }); diff --git a/be/src/vec/exec/vaggregation_node.h b/be/src/vec/exec/vaggregation_node.h index ec0b4a46fb71e5..70222f9ccf7312 100644 --- a/be/src/vec/exec/vaggregation_node.h +++ b/be/src/vec/exec/vaggregation_node.h @@ -637,8 +637,8 @@ class AggregationNode : public ::doris::ExecNode { SCOPED_TIMER(_deserialize_data_timer); _aggregate_evaluators[i]->function()->deserialize_and_merge_vec_selected( _places.data(), _offsets_of_aggregate_states[i], - _deserialize_buffer.data(), (ColumnString*)(column.get()), - _agg_arena_pool.get(), rows); + _deserialize_buffer.data(), column.get(), _agg_arena_pool.get(), + rows); } } else { RETURN_IF_ERROR(_aggregate_evaluators[i]->execute_batch_add_selected( @@ -672,8 +672,8 @@ class AggregationNode : public ::doris::ExecNode { SCOPED_TIMER(_deserialize_data_timer); _aggregate_evaluators[i]->function()->deserialize_and_merge_vec( _places.data(), _offsets_of_aggregate_states[i], - _deserialize_buffer.data(), (ColumnString*)(column.get()), - _agg_arena_pool.get(), rows); + _deserialize_buffer.data(), column.get(), _agg_arena_pool.get(), + rows); } } else { RETURN_IF_ERROR(_aggregate_evaluators[i]->execute_batch_add(