From bdef6013a8c8b263443ebe6c00d10a9c6c41da8b Mon Sep 17 00:00:00 2001 From: zhiqiang Date: Thu, 21 Nov 2024 20:47:05 +0800 Subject: [PATCH] [chore](type cast) Fix some implicit cast (#43050) --- .../vec/data_types/convert_field_to_type.cpp | 5 +- be/src/vec/data_types/data_type.cpp | 4 +- be/src/vec/data_types/data_type.h | 12 +++- be/src/vec/data_types/data_type_bitmap.cpp | 4 +- be/src/vec/data_types/data_type_date.h | 3 +- be/src/vec/data_types/data_type_date_time.h | 2 + be/src/vec/data_types/data_type_decimal.cpp | 26 ++++++--- be/src/vec/data_types/data_type_factory.cpp | 14 ++--- .../data_type_fixed_length_object.cpp | 5 +- be/src/vec/data_types/data_type_jsonb.cpp | 7 ++- be/src/vec/data_types/data_type_jsonb.h | 9 ++- be/src/vec/data_types/data_type_nullable.cpp | 26 ++++++--- .../vec/data_types/data_type_number_base.cpp | 33 ++++++----- be/src/vec/data_types/data_type_number_base.h | 8 ++- be/src/vec/data_types/data_type_object.cpp | 15 +++-- .../data_types/data_type_quantilestate.cpp | 4 +- be/src/vec/data_types/data_type_string.cpp | 58 ++++++++++++------- be/src/vec/data_types/data_type_time_v2.cpp | 1 + be/src/vec/data_types/data_type_time_v2.h | 12 ++-- be/src/vec/runtime/vdatetime_value.cpp | 9 +-- be/src/vec/runtime/vdatetime_value.h | 24 ++++---- 21 files changed, 178 insertions(+), 103 deletions(-) diff --git a/be/src/vec/data_types/convert_field_to_type.cpp b/be/src/vec/data_types/convert_field_to_type.cpp index ecbce03ba6b10a..c625f8c424bba6 100644 --- a/be/src/vec/data_types/convert_field_to_type.cpp +++ b/be/src/vec/data_types/convert_field_to_type.cpp @@ -30,6 +30,7 @@ #include #include +#include "common/cast_set.h" #include "common/exception.h" #include "common/status.h" #include "util/bitmap_value.h" @@ -44,6 +45,7 @@ #include "vec/data_types/data_type_nullable.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" /** Checking for a `Field from` of `From` type falls to a range of values of type `To`. * `From` and `To` - numeric types. They can be floating-point types. * `From` is one of UInt64, Int64, Float64, @@ -257,7 +259,8 @@ void convert_field_to_typeImpl(const Field& src, const IDataType& type, JsonbWriter writer; Field::dispatch([&writer](const auto& value) { FieldVisitorToJsonb()(value, &writer); }, src); - *to = JsonbField(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); + *to = JsonbField(writer.getOutput()->getBuffer(), + cast_set(writer.getOutput()->getSize())); return; } else if (which_type.is_variant_type()) { if (src.get_type() == Field::Types::VariantMap) { diff --git a/be/src/vec/data_types/data_type.cpp b/be/src/vec/data_types/data_type.cpp index cacbf4b2ecec3f..28415076ba339a 100644 --- a/be/src/vec/data_types/data_type.cpp +++ b/be/src/vec/data_types/data_type.cpp @@ -41,6 +41,7 @@ class ReadBuffer; } // namespace doris namespace doris::vectorized { +#include "common/compile_check_begin.h" IDataType::IDataType() = default; @@ -58,7 +59,8 @@ void IDataType::update_avg_value_size_hint(const IColumn& column, double& avg_va /// Update the average value size hint if amount of read rows isn't too small size_t row_size = column.size(); if (row_size > 10) { - double current_avg_value_size = static_cast(column.byte_size()) / row_size; + double current_avg_value_size = + static_cast(column.byte_size()) / static_cast(row_size); /// Heuristic is chosen so that avg_value_size_hint increases rapidly but decreases slowly. if (current_avg_value_size > avg_value_size_hint) { diff --git a/be/src/vec/data_types/data_type.h b/be/src/vec/data_types/data_type.h index dcabc423bb8685..8796e2292a5c1b 100644 --- a/be/src/vec/data_types/data_type.h +++ b/be/src/vec/data_types/data_type.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "common/exception.h" @@ -44,7 +45,7 @@ class PColumnMeta; enum PGenericType_TypeId : int; namespace vectorized { - +#include "common/compile_check_begin.h" class IDataType; class IColumn; class BufferWritable; @@ -58,8 +59,11 @@ class Field; using DataTypePtr = std::shared_ptr; using DataTypes = std::vector; constexpr auto SERIALIZED_MEM_SIZE_LIMIT = 256; -inline size_t upper_int32(size_t size) { - return size_t((3 + size) / 4.0); + +template +T upper_int32(T size) { + static_assert(std::is_unsigned_v); + return T(static_cast(3 + size) / 4.0); } /** Properties of data type. @@ -421,4 +425,6 @@ char* serialize_const_flag_and_row_num(const IColumn** column, char* buf, const char* deserialize_const_flag_and_row_num(const char* buf, MutableColumnPtr* column, size_t* real_have_saved_num); } // namespace vectorized + +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/vec/data_types/data_type_bitmap.cpp b/be/src/vec/data_types/data_type_bitmap.cpp index 4ab6b3abc47e64..3dc585f0b5efe7 100644 --- a/be/src/vec/data_types/data_type_bitmap.cpp +++ b/be/src/vec/data_types/data_type_bitmap.cpp @@ -30,7 +30,7 @@ #include "vec/io/io_helper.h" namespace doris::vectorized { - +#include "common/compile_check_begin.h" // binary: const flag| row num | real saved num | size array | bitmap array // : bitmap1 size | bitmap2 size | ... // : bitmap1 | bitmap2 | ... @@ -159,7 +159,7 @@ MutableColumnPtr DataTypeBitMap::create_column() const { void DataTypeBitMap::serialize_as_stream(const BitmapValue& cvalue, BufferWritable& buf) { auto& value = const_cast(cvalue); std::string memory_buffer; - int bytesize = value.getSizeInBytes(); + size_t bytesize = value.getSizeInBytes(); memory_buffer.resize(bytesize); value.write_to(const_cast(memory_buffer.data())); write_string_binary(memory_buffer, buf); diff --git a/be/src/vec/data_types/data_type_date.h b/be/src/vec/data_types/data_type_date.h index 0d62230a9b0080..0df23022e2b6d2 100644 --- a/be/src/vec/data_types/data_type_date.h +++ b/be/src/vec/data_types/data_type_date.h @@ -35,6 +35,7 @@ #include "vec/data_types/serde/data_type_date64_serde.h" namespace doris { +#include "common/compile_check_begin.h" namespace vectorized { class BufferWritable; class ReadBuffer; @@ -92,5 +93,5 @@ class DataTypeDate final : public DataTypeNumberBase { return std::make_shared(nesting_level); } }; - +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/data_types/data_type_date_time.h b/be/src/vec/data_types/data_type_date_time.h index 99507a370391e1..03a6a85657935c 100644 --- a/be/src/vec/data_types/data_type_date_time.h +++ b/be/src/vec/data_types/data_type_date_time.h @@ -45,6 +45,7 @@ class DataTypeDateV2; } // namespace doris namespace doris::vectorized { +#include "common/compile_check_begin.h" /** DateTime stores time as unix timestamp. * The value itself is independent of time zone. @@ -143,4 +144,5 @@ constexpr bool IsTimeType = IsDateTimeType || IsDateType; template constexpr bool IsTimeV2Type = IsDateTimeV2Type || IsDateV2Type; +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/data_types/data_type_decimal.cpp b/be/src/vec/data_types/data_type_decimal.cpp index 35575106cdbf60..d08c8268036abb 100644 --- a/be/src/vec/data_types/data_type_decimal.cpp +++ b/be/src/vec/data_types/data_type_decimal.cpp @@ -25,9 +25,12 @@ #include #include +#include +#include #include #include "agent/be_exec_version_manager.h" +#include "common/cast_set.h" #include "runtime/decimalv2_value.h" #include "util/string_parser.hpp" #include "vec/columns/column.h" @@ -38,11 +41,12 @@ #include "vec/common/string_buffer.hpp" #include "vec/common/typeid_cast.h" #include "vec/core/types.h" +#include "vec/data_types/data_type.h" #include "vec/io/io_helper.h" #include "vec/io/reader_buffer.h" namespace doris::vectorized { - +#include "common/compile_check_begin.h" template std::string DataTypeDecimal::do_get_name() const { std::stringstream ss; @@ -124,7 +128,9 @@ void DataTypeDecimal::to_string_batch_impl(const ColumnPtr& column_ptr, auto str = value.to_string(get_format_scale()); chars.insert(str.begin(), str.end()); } - offsets[row_num] = chars.size(); + + // cast by row, so not use cast_set for performance issue + offsets[row_num] = static_cast(chars.size()); } } @@ -157,12 +163,13 @@ int64_t DataTypeDecimal::get_uncompressed_serialized_bytes(const IColumn& col if (be_exec_version >= USE_CONST_SERDE) { auto size = sizeof(bool) + sizeof(size_t) + sizeof(size_t); auto real_need_copy_num = is_column_const(column) ? 1 : column.size(); - auto mem_size = sizeof(T) * real_need_copy_num; + auto mem_size = cast_set(sizeof(T) * real_need_copy_num); if (mem_size <= SERIALIZED_MEM_SIZE_LIMIT) { return size + mem_size; } else { return size + sizeof(size_t) + - std::max(mem_size, streamvbyte_max_compressedbytes(upper_int32(mem_size))); + std::max(cast_set(mem_size), + streamvbyte_max_compressedbytes(upper_int32(mem_size))); } } else { auto size = sizeof(T) * column.size(); @@ -170,7 +177,8 @@ int64_t DataTypeDecimal::get_uncompressed_serialized_bytes(const IColumn& col return sizeof(uint32_t) + size; } else { return sizeof(uint32_t) + sizeof(size_t) + - std::max(size, streamvbyte_max_compressedbytes(upper_int32(size))); + std::max(size, + streamvbyte_max_compressedbytes(cast_set(upper_int32(size)))); } } } @@ -183,7 +191,7 @@ char* DataTypeDecimal::serialize(const IColumn& column, char* buf, int be_exe buf = serialize_const_flag_and_row_num(&data_column, buf, &real_need_copy_num); // mem_size = real_need_copy_num * sizeof(T) - const uint32_t mem_size = real_need_copy_num * sizeof(T); + UInt32 mem_size = cast_set(real_need_copy_num * sizeof(T)); const auto* origin_data = assert_cast&>(*data_column).get_data().data(); @@ -201,7 +209,7 @@ char* DataTypeDecimal::serialize(const IColumn& column, char* buf, int be_exe } } else { // row num - const auto mem_size = column.size() * sizeof(T); + UInt32 mem_size = cast_set(column.size() * sizeof(T)); *reinterpret_cast(buf) = mem_size; buf += sizeof(uint32_t); // column data @@ -230,7 +238,7 @@ const char* DataTypeDecimal::deserialize(const char* buf, MutableColumnPtr* c buf = deserialize_const_flag_and_row_num(buf, column, &real_have_saved_num); // column data - auto mem_size = real_have_saved_num * sizeof(T); + UInt32 mem_size = cast_set(real_have_saved_num * sizeof(T)); auto& container = assert_cast*>(origin_column)->get_data(); container.resize(real_have_saved_num); if (mem_size <= SERIALIZED_MEM_SIZE_LIMIT) { @@ -289,7 +297,7 @@ template bool DataTypeDecimal::parse_from_string(const std::string& str, T* res) const { StringParser::ParseResult result = StringParser::PARSE_SUCCESS; res->value = StringParser::string_to_decimal::get_primitive_type()>( - str.c_str(), str.size(), precision, scale, &result); + str.c_str(), cast_set(str.size()), precision, scale, &result); return result == StringParser::PARSE_SUCCESS || result == StringParser::PARSE_UNDERFLOW; } diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp index 388710b667a343..369809d77f68f3 100644 --- a/be/src/vec/data_types/data_type_factory.cpp +++ b/be/src/vec/data_types/data_type_factory.cpp @@ -67,7 +67,7 @@ #include "vec/data_types/data_type_time_v2.h" namespace doris::vectorized { - +#include "common/compile_check_begin.h" DataTypePtr DataTypeFactory::create_data_type(const doris::Field& col_desc) { return create_data_type(col_desc.get_desc(), col_desc.is_nullable()); } @@ -76,7 +76,7 @@ DataTypePtr DataTypeFactory::create_data_type(const TabletColumn& col_desc, bool DataTypePtr nested = nullptr; if (col_desc.type() == FieldType::OLAP_FIELD_TYPE_AGG_STATE) { DataTypes dataTypes; - for (size_t i = 0; i < col_desc.get_subtype_count(); i++) { + for (UInt32 i = 0; i < col_desc.get_subtype_count(); i++) { dataTypes.push_back(create_data_type(col_desc.get_sub_column(i))); } nested = std::make_shared( @@ -97,7 +97,7 @@ DataTypePtr DataTypeFactory::create_data_type(const TabletColumn& col_desc, bool Strings names; dataTypes.reserve(col_size); names.reserve(col_size); - for (size_t i = 0; i < col_size; i++) { + for (UInt32 i = 0; i < col_size; i++) { dataTypes.push_back(create_data_type(col_desc.get_sub_column(i))); names.push_back(col_desc.get_sub_column(i).name()); } @@ -546,13 +546,13 @@ DataTypePtr DataTypeFactory::create_data_type(const PColumnMeta& pcolumn) { create_data_type(pcolumn.children(1))); break; case PGenericType::STRUCT: { - size_t col_size = pcolumn.children_size(); + int col_size = pcolumn.children_size(); DCHECK(col_size >= 1); DataTypes dataTypes; Strings names; dataTypes.reserve(col_size); names.reserve(col_size); - for (size_t i = 0; i < col_size; i++) { + for (int i = 0; i < col_size; i++) { dataTypes.push_back(create_data_type(pcolumn.children(i))); names.push_back(pcolumn.children(i).name()); } @@ -615,10 +615,10 @@ DataTypePtr DataTypeFactory::create_data_type(const segment_v2::ColumnMetaPB& pc create_data_type(pcolumn.children_columns(1))); } else if (pcolumn.type() == static_cast(FieldType::OLAP_FIELD_TYPE_STRUCT)) { DCHECK_GE(pcolumn.children_columns().size(), 1); - size_t col_size = pcolumn.children_columns().size(); + Int32 col_size = pcolumn.children_columns().size(); DataTypes dataTypes(col_size); Strings names(col_size); - for (size_t i = 0; i < col_size; i++) { + for (Int32 i = 0; i < col_size; i++) { dataTypes[i] = create_data_type(pcolumn.children_columns(i)); } nested = std::make_shared(dataTypes, names); diff --git a/be/src/vec/data_types/data_type_fixed_length_object.cpp b/be/src/vec/data_types/data_type_fixed_length_object.cpp index 11e56892f064fe..3d8dd001077b30 100644 --- a/be/src/vec/data_types/data_type_fixed_length_object.cpp +++ b/be/src/vec/data_types/data_type_fixed_length_object.cpp @@ -23,10 +23,13 @@ #include #include "agent/be_exec_version_manager.h" +#include "common/cast_set.h" #include "vec/columns/column.h" #include "vec/common/assert_cast.h" +#include "vec/core/types.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" char* DataTypeFixedLengthObject::serialize(const IColumn& column, char* buf, int be_exec_version) const { @@ -62,7 +65,7 @@ char* DataTypeFixedLengthObject::serialize(const IColumn& column, char* buf, return buf; } else { // row num - const auto row_num = column.size(); + const UInt32 row_num = cast_set(column.size()); *reinterpret_cast(buf) = row_num; buf += sizeof(uint32_t); // column data diff --git a/be/src/vec/data_types/data_type_jsonb.cpp b/be/src/vec/data_types/data_type_jsonb.cpp index 102f6d4889b932..49ec95b3b44806 100644 --- a/be/src/vec/data_types/data_type_jsonb.cpp +++ b/be/src/vec/data_types/data_type_jsonb.cpp @@ -20,11 +20,13 @@ #include #include +#include "common/cast_set.h" #include "util/jsonb_utils.h" #include "vec/columns/column_const.h" #include "vec/common/assert_cast.h" #include "vec/common/string_buffer.hpp" #include "vec/common/string_ref.h" +#include "vec/core/types.h" #include "vec/io/reader_buffer.h" namespace doris { @@ -34,7 +36,7 @@ class IColumn; } // namespace doris namespace doris::vectorized { - +#include "common/compile_check_begin.h" std::string DataTypeJsonb::to_string(const IColumn& column, size_t row_num) const { auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; @@ -59,7 +61,8 @@ void DataTypeJsonb::to_string(const class doris::vectorized::IColumn& column, si Status DataTypeJsonb::from_string(ReadBuffer& rb, IColumn* column) const { JsonBinaryValue value; - RETURN_IF_ERROR(value.from_json_string(rb.position(), rb.count())); + // Throw exception if rb.count is large than INT32_MAX + RETURN_IF_ERROR(value.from_json_string(rb.position(), cast_set(rb.count()))); auto* column_string = static_cast(column); column_string->insert_data(value.value(), value.size()); diff --git a/be/src/vec/data_types/data_type_jsonb.h b/be/src/vec/data_types/data_type_jsonb.h index 3d681e3ce79754..4aec1ccc0417a9 100644 --- a/be/src/vec/data_types/data_type_jsonb.h +++ b/be/src/vec/data_types/data_type_jsonb.h @@ -24,6 +24,7 @@ #include #include +#include "common/cast_set.h" #include "common/status.h" #include "runtime/define_primitive_type.h" #include "runtime/jsonb_value.h" @@ -45,6 +46,7 @@ class ReadBuffer; } // namespace doris namespace doris::vectorized { +#include "common/compile_check_begin.h" class DataTypeJsonb final : public IDataType { public: using ColumnType = ColumnString; @@ -70,8 +72,9 @@ class DataTypeJsonb final : public IDataType { virtual Field get_default() const override { std::string default_json = "null"; - JsonBinaryValue binary_val(default_json.c_str(), default_json.size()); - return JsonbField(binary_val.value(), binary_val.size()); + JsonBinaryValue binary_val(default_json.c_str(), static_cast(default_json.size())); + // Throw exception if default_json.size() is large than INT32_MAX + return JsonbField(binary_val.value(), cast_set(binary_val.size())); } Field get_field(const TExprNode& node) const override { @@ -100,4 +103,6 @@ class DataTypeJsonb final : public IDataType { private: DataTypeString data_type_string; }; + +#include "common/compile_check_end.h" } // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/data_types/data_type_nullable.cpp b/be/src/vec/data_types/data_type_nullable.cpp index 66cbcb25a9b6bc..9f155babfaac3e 100644 --- a/be/src/vec/data_types/data_type_nullable.cpp +++ b/be/src/vec/data_types/data_type_nullable.cpp @@ -30,6 +30,7 @@ #include #include "agent/be_exec_version_manager.h" +#include "common/cast_set.h" #include "vec/columns/column.h" #include "vec/columns/column_const.h" #include "vec/columns/column_nullable.h" @@ -37,12 +38,13 @@ #include "vec/common/assert_cast.h" #include "vec/common/string_buffer.hpp" #include "vec/core/field.h" +#include "vec/core/types.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_nothing.h" #include "vec/io/reader_buffer.h" namespace doris::vectorized { - +#include "common/compile_check_begin.h" DataTypeNullable::DataTypeNullable(const DataTypePtr& nested_data_type_) : nested_data_type {nested_data_type_} { if (!nested_data_type) { @@ -114,8 +116,10 @@ int64_t DataTypeNullable::get_uncompressed_serialized_bytes(const IColumn& colum if (mem_size <= SERIALIZED_MEM_SIZE_LIMIT) { size += mem_size; } else { + // Throw exception if mem_size is large than UINT32_MAX size = size + sizeof(size_t) + - std::max(mem_size, streamvbyte_max_compressedbytes(upper_int32(mem_size))); + std::max(mem_size, streamvbyte_max_compressedbytes( + cast_set(upper_int32(mem_size)))); } const auto& col = assert_cast(*data_column); size = size + nested_data_type->get_uncompressed_serialized_bytes(col.get_nested_column(), @@ -126,8 +130,10 @@ int64_t DataTypeNullable::get_uncompressed_serialized_bytes(const IColumn& colum if (size_t size = sizeof(bool) * column.size(); size <= SERIALIZED_MEM_SIZE_LIMIT) { ret += size + sizeof(uint32_t); } else { + // Throw exception if mem_size is large than UINT32_MAX ret += (sizeof(uint32_t) + sizeof(size_t) + - std::max(size, streamvbyte_max_compressedbytes(upper_int32(size)))); + std::max(size, + streamvbyte_max_compressedbytes(cast_set(upper_int32(size))))); } ret += nested_data_type->get_uncompressed_serialized_bytes( assert_cast(*column.convert_to_full_column_if_const()) @@ -151,9 +157,10 @@ char* DataTypeNullable::serialize(const IColumn& column, char* buf, int be_exec_ memcpy(buf, col.get_null_map_data().data(), mem_size); buf += mem_size; } else { + // Throw exception if mem_size is large than UINT32_MAX auto encode_size = streamvbyte_encode( reinterpret_cast(col.get_null_map_data().data()), - upper_int32(mem_size), (uint8_t*)(buf + sizeof(size_t))); + cast_set(upper_int32(mem_size)), (uint8_t*)(buf + sizeof(size_t))); *reinterpret_cast(buf) = encode_size; buf += (sizeof(size_t) + encode_size); } @@ -165,16 +172,17 @@ char* DataTypeNullable::serialize(const IColumn& column, char* buf, int be_exec_ // row num auto mem_size = col.size() * sizeof(bool); - *reinterpret_cast(buf) = mem_size; + *reinterpret_cast(buf) = static_cast(mem_size); buf += sizeof(uint32_t); // null flags if (mem_size <= SERIALIZED_MEM_SIZE_LIMIT) { memcpy(buf, col.get_null_map_data().data(), mem_size); buf += mem_size; } else { + // Throw exception if mem_size is large than UINT32_MAX auto encode_size = streamvbyte_encode( reinterpret_cast(col.get_null_map_data().data()), - upper_int32(mem_size), (uint8_t*)(buf + sizeof(size_t))); + cast_set(upper_int32(mem_size)), (uint8_t*)(buf + sizeof(size_t))); *reinterpret_cast(buf) = encode_size; buf += (sizeof(size_t) + encode_size); } @@ -200,8 +208,9 @@ const char* DataTypeNullable::deserialize(const char* buf, MutableColumnPtr* col } else { size_t encode_size = *reinterpret_cast(buf); buf += sizeof(size_t); + // Throw exception if mem_size is large than UINT32_MAX streamvbyte_decode((const uint8_t*)buf, (uint32_t*)(col->get_null_map_data().data()), - upper_int32(mem_size)); + cast_set(upper_int32(mem_size))); buf += encode_size; } // column data values @@ -221,8 +230,9 @@ const char* DataTypeNullable::deserialize(const char* buf, MutableColumnPtr* col } else { size_t encode_size = *reinterpret_cast(buf); buf += sizeof(size_t); + // Throw exception if mem_size is large than UINT32_MAX streamvbyte_decode((const uint8_t*)buf, (uint32_t*)(col->get_null_map_data().data()), - upper_int32(mem_size)); + cast_set(upper_int32(mem_size))); buf += encode_size; } // data values diff --git a/be/src/vec/data_types/data_type_number_base.cpp b/be/src/vec/data_types/data_type_number_base.cpp index 7d5c831144b971..da34d82031cda0 100644 --- a/be/src/vec/data_types/data_type_number_base.cpp +++ b/be/src/vec/data_types/data_type_number_base.cpp @@ -30,6 +30,7 @@ #include #include "agent/be_exec_version_manager.h" +#include "common/cast_set.h" #include "gutil/strings/numbers.h" #include "runtime/large_int_value.h" #include "util/mysql_global.h" @@ -39,11 +40,12 @@ #include "vec/columns/column_vector.h" #include "vec/common/assert_cast.h" #include "vec/common/string_buffer.hpp" +#include "vec/core/types.h" #include "vec/io/io_helper.h" #include "vec/io/reader_buffer.h" namespace doris::vectorized { - +#include "common/compile_check_begin.h" template void DataTypeNumberBase::to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const { @@ -197,16 +199,20 @@ int64_t DataTypeNumberBase::get_uncompressed_serialized_bytes(const IColumn& if (mem_size <= SERIALIZED_MEM_SIZE_LIMIT) { return size + mem_size; } else { + // Throw exception if mem_size is large than UINT32_MAX return size + sizeof(size_t) + - std::max(mem_size, streamvbyte_max_compressedbytes(upper_int32(mem_size))); + std::max(mem_size, streamvbyte_max_compressedbytes( + cast_set(upper_int32(mem_size)))); } } else { auto size = sizeof(T) * column.size(); if (size <= SERIALIZED_MEM_SIZE_LIMIT) { return sizeof(uint32_t) + size; } else { + // Throw exception if mem_size is large than UINT32_MAX return sizeof(uint32_t) + sizeof(size_t) + - std::max(size, streamvbyte_max_compressedbytes(upper_int32(size))); + std::max(size, + streamvbyte_max_compressedbytes(cast_set(upper_int32(size)))); } } } @@ -229,9 +235,10 @@ char* DataTypeNumberBase::serialize(const IColumn& column, char* buf, memcpy(buf, origin_data, mem_size); return buf + mem_size; } else { - auto encode_size = - streamvbyte_encode(reinterpret_cast(origin_data), - upper_int32(mem_size), (uint8_t*)(buf + sizeof(size_t))); + // Throw exception if mem_size is large than UINT32_MAX + auto encode_size = streamvbyte_encode(reinterpret_cast(origin_data), + cast_set(upper_int32(mem_size)), + (uint8_t*)(buf + sizeof(size_t))); *reinterpret_cast(buf) = encode_size; buf += sizeof(size_t); return buf + encode_size; @@ -239,7 +246,7 @@ char* DataTypeNumberBase::serialize(const IColumn& column, char* buf, } else { // row num const auto mem_size = column.size() * sizeof(T); - *reinterpret_cast(buf) = mem_size; + *reinterpret_cast(buf) = static_cast(mem_size); buf += sizeof(uint32_t); // column data auto ptr = column.convert_to_full_column_if_const(); @@ -248,10 +255,10 @@ char* DataTypeNumberBase::serialize(const IColumn& column, char* buf, memcpy(buf, origin_data, mem_size); return buf + mem_size; } - - auto encode_size = - streamvbyte_encode(reinterpret_cast(origin_data), - upper_int32(mem_size), (uint8_t*)(buf + sizeof(size_t))); + // Throw exception if mem_size is large than UINT32_MAX + auto encode_size = streamvbyte_encode(reinterpret_cast(origin_data), + cast_set(upper_int32(mem_size)), + (uint8_t*)(buf + sizeof(size_t))); *reinterpret_cast(buf) = encode_size; buf += sizeof(size_t); return buf + encode_size; @@ -277,7 +284,7 @@ const char* DataTypeNumberBase::deserialize(const char* buf, MutableColumnPtr size_t encode_size = *reinterpret_cast(buf); buf += sizeof(size_t); streamvbyte_decode((const uint8_t*)buf, (uint32_t*)(container.data()), - upper_int32(mem_size)); + cast_set(upper_int32(mem_size))); buf = buf + encode_size; } return buf; @@ -296,7 +303,7 @@ const char* DataTypeNumberBase::deserialize(const char* buf, MutableColumnPtr size_t encode_size = *reinterpret_cast(buf); buf += sizeof(size_t); streamvbyte_decode((const uint8_t*)buf, (uint32_t*)(container.data()), - upper_int32(mem_size)); + cast_set(upper_int32(mem_size))); return buf + encode_size; } } diff --git a/be/src/vec/data_types/data_type_number_base.h b/be/src/vec/data_types/data_type_number_base.h index 0468ed80c8de03..a376b458f5133b 100644 --- a/be/src/vec/data_types/data_type_number_base.h +++ b/be/src/vec/data_types/data_type_number_base.h @@ -30,6 +30,7 @@ #include #include +#include "common/cast_set.h" #include "common/status.h" #include "runtime/define_primitive_type.h" #include "serde/data_type_number_serde.h" @@ -51,7 +52,7 @@ struct TypeId; } // namespace doris namespace doris::vectorized { - +#include "common/compile_check_begin.h" /** Implements part of the IDataType interface, common to all numbers and for Date and DateTime. */ template @@ -188,12 +189,13 @@ class DataTypeNumberBase : public IDataType { for (int row_num = 0; row_num < size; row_num++) { auto num = is_const ? col_vec.get_element(0) : col_vec.get_element(row_num); static_cast(this)->push_number(chars, num); - offsets[row_num] = chars.size(); + // push_number can check the chars is over uint32 so use static_cast here. + offsets[row_num] = static_cast(chars.size()); } } private: bool _is_null_literal = false; }; - +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/data_types/data_type_object.cpp b/be/src/vec/data_types/data_type_object.cpp index 6dde7df44d5ba1..0c795e542b0dd5 100644 --- a/be/src/vec/data_types/data_type_object.cpp +++ b/be/src/vec/data_types/data_type_object.cpp @@ -33,6 +33,7 @@ #include "vec/columns/column_object.h" #include "vec/common/assert_cast.h" #include "vec/common/typeid_cast.h" +#include "vec/core/types.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_factory.hpp" #include "vec/json/path_in_data.h" @@ -44,7 +45,7 @@ class IColumn; } // namespace doris namespace doris::vectorized { - +#include "common/compile_check_begin.h" DataTypeObject::DataTypeObject(const String& schema_format_, bool is_nullable_) : schema_format(to_lower(schema_format_)), is_nullable(is_nullable_) {} bool DataTypeObject::equals(const IDataType& rhs) const { @@ -115,7 +116,8 @@ char* DataTypeObject::serialize(const IColumn& column, char* buf, int be_exec_ve type->to_pb_column_meta(&column_meta_pb); std::string meta_binary; column_meta_pb.SerializeToString(&meta_binary); - *reinterpret_cast(buf) = meta_binary.size(); + // Safe cast + *reinterpret_cast(buf) = static_cast(meta_binary.size()); buf += sizeof(uint32_t); memcpy(buf, meta_binary.data(), meta_binary.size()); buf += meta_binary.size(); @@ -124,10 +126,11 @@ char* DataTypeObject::serialize(const IColumn& column, char* buf, int be_exec_ve buf = type->serialize(entry->data.get_finalized_column(), buf, be_exec_version); } // serialize num of subcolumns - *reinterpret_cast(size_pos) = num_of_columns; + // Safe case + *reinterpret_cast(size_pos) = static_cast(num_of_columns); // serialize num of rows, only take effect when subcolumns empty if (be_exec_version >= VARIANT_SERDE) { - *reinterpret_cast(buf) = column_object.rows(); + *reinterpret_cast(buf) = static_cast(column_object.rows()); buf += sizeof(uint32_t); } @@ -183,13 +186,13 @@ const char* DataTypeObject::deserialize(const char* buf, MutableColumnPtr* colum std::string DataTypeObject::to_string(const IColumn& column, size_t row_num) const { const auto& variant = assert_cast(column); std::string res; - static_cast(variant.serialize_one_row_to_string(row_num, &res)); + static_cast(variant.serialize_one_row_to_string(cast_set(row_num), &res)); return res; } void DataTypeObject::to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const { const auto& variant = assert_cast(column); - static_cast(variant.serialize_one_row_to_string(row_num, ostr)); + static_cast(variant.serialize_one_row_to_string(cast_set(row_num), ostr)); } } // namespace doris::vectorized diff --git a/be/src/vec/data_types/data_type_quantilestate.cpp b/be/src/vec/data_types/data_type_quantilestate.cpp index 32cc60698012f5..567cbd58804031 100644 --- a/be/src/vec/data_types/data_type_quantilestate.cpp +++ b/be/src/vec/data_types/data_type_quantilestate.cpp @@ -27,6 +27,7 @@ #include "vec/io/io_helper.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" // binary: const flag | row num | read saved num | | // : quantilestate1 size | quantilestate2 size | ... // : quantilestate1 | quantilestate2 | ... @@ -158,8 +159,7 @@ MutableColumnPtr DataTypeQuantileState::create_column() const { void DataTypeQuantileState::serialize_as_stream(const QuantileState& cvalue, BufferWritable& buf) { auto& value = const_cast(cvalue); std::string memory_buffer; - int bytesize = value.get_serialized_size(); - memory_buffer.resize(bytesize); + memory_buffer.resize(value.get_serialized_size()); value.serialize(const_cast(reinterpret_cast(memory_buffer.data()))); write_string_binary(memory_buffer, buf); } diff --git a/be/src/vec/data_types/data_type_string.cpp b/be/src/vec/data_types/data_type_string.cpp index 424cd43bd3ab57..c01f2c1f24912a 100644 --- a/be/src/vec/data_types/data_type_string.cpp +++ b/be/src/vec/data_types/data_type_string.cpp @@ -24,9 +24,11 @@ #include #include +#include #include #include "agent/be_exec_version_manager.h" +#include "common/cast_set.h" #include "common/exception.h" #include "common/status.h" #include "vec/columns/column.h" @@ -36,10 +38,11 @@ #include "vec/common/string_buffer.hpp" #include "vec/common/string_ref.h" #include "vec/core/field.h" +#include "vec/core/types.h" #include "vec/io/reader_buffer.h" namespace doris::vectorized { - +#include "common/compile_check_begin.h" std::string DataTypeString::to_string(const IColumn& column, size_t row_num) const { auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; @@ -97,8 +100,10 @@ int64_t DataTypeString::get_uncompressed_serialized_bytes(const IColumn& column, if (offsets_size <= SERIALIZED_MEM_SIZE_LIMIT) { size += offsets_size; } else { - size += sizeof(size_t) + std::max(offsets_size, streamvbyte_max_compressedbytes( - upper_int32(offsets_size))); + // Throw exception if offsets_size is large than UINT32_MAX + size += sizeof(size_t) + + std::max(offsets_size, streamvbyte_max_compressedbytes( + cast_set(upper_int32(offsets_size)))); } size += sizeof(size_t); if (size_t bytes = data_column.get_chars().size(); bytes <= SERIALIZED_MEM_SIZE_LIMIT) { @@ -110,7 +115,8 @@ int64_t DataTypeString::get_uncompressed_serialized_bytes(const IColumn& column, "LZ4_MAX_INPUT_SIZE={}", bytes, LZ4_MAX_INPUT_SIZE); } - size += sizeof(size_t) + std::max(bytes, (size_t)LZ4_compressBound(bytes)); + size += sizeof(size_t) + + std::max(bytes, (size_t)LZ4_compressBound(cast_set(bytes))); } return size; } else { @@ -121,14 +127,18 @@ int64_t DataTypeString::get_uncompressed_serialized_bytes(const IColumn& column, offsets_size <= SERIALIZED_MEM_SIZE_LIMIT) { size += offsets_size; } else { - size += sizeof(size_t) + std::max(offsets_size, streamvbyte_max_compressedbytes( - upper_int32(offsets_size))); + // Throw exception if offsets_size is large than UINT32_MAX + size += sizeof(size_t) + + std::max(offsets_size, streamvbyte_max_compressedbytes( + cast_set(upper_int32(offsets_size)))); } if (auto bytes = data_column.get_chars().size(); bytes <= SERIALIZED_MEM_SIZE_LIMIT) { size += bytes; } else { - size += sizeof(size_t) + std::max(bytes, (size_t)LZ4_compressBound(bytes)); + // Throw exception if bytes is large than UINT32_MAX + size += sizeof(size_t) + + std::max(bytes, (size_t)LZ4_compressBound(cast_set(bytes))); } return size; } @@ -141,16 +151,17 @@ char* DataTypeString::serialize(const IColumn& column, char* buf, int be_exec_ve buf = serialize_const_flag_and_row_num(&data_column, buf, &real_need_copy_num); // mem_size = real_row_num * sizeof(IColumn::Offset) - auto mem_size = real_need_copy_num * sizeof(IColumn::Offset); + size_t mem_size = real_need_copy_num * sizeof(IColumn::Offset); const auto& string_column = assert_cast(*data_column); // offsets if (mem_size <= SERIALIZED_MEM_SIZE_LIMIT) { memcpy(buf, string_column.get_offsets().data(), mem_size); buf += mem_size; } else { + // Throw exception if mem_size is large than UINT32_MAX auto encode_size = streamvbyte_encode( reinterpret_cast(string_column.get_offsets().data()), - upper_int32(mem_size), (uint8_t*)(buf + sizeof(size_t))); + cast_set(upper_int32(mem_size)), (uint8_t*)(buf + sizeof(size_t))); *reinterpret_cast(buf) = encode_size; buf += (sizeof(size_t) + encode_size); } @@ -163,9 +174,9 @@ char* DataTypeString::serialize(const IColumn& column, char* buf, int be_exec_ve memcpy(buf, string_column.get_chars().data(), value_len); buf += value_len; } else { - auto encode_size = - LZ4_compress_fast(string_column.get_chars().raw_data(), (buf + sizeof(size_t)), - value_len, LZ4_compressBound(value_len), 1); + auto encode_size = LZ4_compress_fast(string_column.get_chars().raw_data(), + (buf + sizeof(size_t)), cast_set(value_len), + LZ4_compressBound(cast_set(value_len)), 1); *reinterpret_cast(buf) = encode_size; buf += (sizeof(size_t) + encode_size); } @@ -175,17 +186,18 @@ char* DataTypeString::serialize(const IColumn& column, char* buf, int be_exec_ve const auto& data_column = assert_cast(*ptr.get()); // row num - uint32_t mem_size = data_column.size() * sizeof(IColumn::Offset); - *reinterpret_cast(buf) = mem_size; + size_t mem_size = data_column.size() * sizeof(IColumn::Offset); + *reinterpret_cast(buf) = static_cast(mem_size); buf += sizeof(uint32_t); // offsets if (mem_size <= SERIALIZED_MEM_SIZE_LIMIT) { memcpy(buf, data_column.get_offsets().data(), mem_size); buf += mem_size; } else { + // Throw exception if mem_size is large than UINT32_MAX auto encode_size = streamvbyte_encode( reinterpret_cast(data_column.get_offsets().data()), - upper_int32(mem_size), (uint8_t*)(buf + sizeof(size_t))); + cast_set(upper_int32(mem_size)), (uint8_t*)(buf + sizeof(size_t))); *reinterpret_cast(buf) = encode_size; buf += (sizeof(size_t) + encode_size); } @@ -199,9 +211,9 @@ char* DataTypeString::serialize(const IColumn& column, char* buf, int be_exec_ve buf += value_len; return buf; } - auto encode_size = - LZ4_compress_fast(data_column.get_chars().raw_data(), (buf + sizeof(size_t)), - value_len, LZ4_compressBound(value_len), 1); + auto encode_size = LZ4_compress_fast(data_column.get_chars().raw_data(), + (buf + sizeof(size_t)), cast_set(value_len), + LZ4_compressBound(cast_set(value_len)), 1); *reinterpret_cast(buf) = encode_size; buf += (sizeof(size_t) + encode_size); return buf; @@ -229,7 +241,7 @@ const char* DataTypeString::deserialize(const char* buf, MutableColumnPtr* colum size_t encode_size = *reinterpret_cast(buf); buf += sizeof(size_t); streamvbyte_decode((const uint8_t*)buf, (uint32_t*)(offsets.data()), - upper_int32(mem_size)); + cast_set(upper_int32(mem_size))); buf += encode_size; } @@ -245,7 +257,8 @@ const char* DataTypeString::deserialize(const char* buf, MutableColumnPtr* colum } else { size_t encode_size = *reinterpret_cast(buf); buf += sizeof(size_t); - LZ4_decompress_safe(buf, reinterpret_cast(data.data()), encode_size, value_len); + LZ4_decompress_safe(buf, reinterpret_cast(data.data()), + cast_set(encode_size), cast_set(value_len)); buf += encode_size; } return buf; @@ -265,7 +278,7 @@ const char* DataTypeString::deserialize(const char* buf, MutableColumnPtr* colum size_t encode_size = *reinterpret_cast(buf); buf += sizeof(size_t); streamvbyte_decode((const uint8_t*)buf, (uint32_t*)(offsets.data()), - upper_int32(mem_size)); + cast_set(upper_int32(mem_size))); buf += encode_size; } // total length @@ -280,7 +293,8 @@ const char* DataTypeString::deserialize(const char* buf, MutableColumnPtr* colum } else { size_t encode_size = *reinterpret_cast(buf); buf += sizeof(size_t); - LZ4_decompress_safe(buf, reinterpret_cast(data.data()), encode_size, value_len); + LZ4_decompress_safe(buf, reinterpret_cast(data.data()), + cast_set(encode_size), cast_set(value_len)); buf += encode_size; } return buf; diff --git a/be/src/vec/data_types/data_type_time_v2.cpp b/be/src/vec/data_types/data_type_time_v2.cpp index 53560fac4bab2d..604518616fabf5 100644 --- a/be/src/vec/data_types/data_type_time_v2.cpp +++ b/be/src/vec/data_types/data_type_time_v2.cpp @@ -37,6 +37,7 @@ #include "vec/runtime/vdatetime_value.h" namespace doris { +#include "common/compile_check_begin.h" namespace vectorized { class IColumn; } // namespace vectorized diff --git a/be/src/vec/data_types/data_type_time_v2.h b/be/src/vec/data_types/data_type_time_v2.h index 7688a04a9a86f7..e9f3fd383658ac 100644 --- a/be/src/vec/data_types/data_type_time_v2.h +++ b/be/src/vec/data_types/data_type_time_v2.h @@ -24,6 +24,7 @@ #include #include +#include #include #include @@ -49,7 +50,7 @@ class IColumn; } // namespace doris namespace doris::vectorized { - +#include "common/compile_check_begin.h" /** * Use UInt32 as underlying type to represent DateV2 type. * Specifically, a dateV2 type is represented as (YYYY (23 bits), MM (4 bits), dd (5 bits)). @@ -73,7 +74,8 @@ class DataTypeDateV2 final : public DataTypeNumberBase { Field get_field(const TExprNode& node) const override { DateV2Value value; - if (value.from_date_str(node.date_literal.value.c_str(), node.date_literal.value.size())) { + if (value.from_date_str(node.date_literal.value.c_str(), + cast_set(node.date_literal.value.size()))) { return value.to_date_int_val(); } else { throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT, @@ -150,8 +152,8 @@ class DataTypeDateTimeV2 final : public DataTypeNumberBase { DateV2Value value; const int32_t scale = node.type.types.empty() ? -1 : node.type.types.front().scalar_type.scale; - if (value.from_date_str(node.date_literal.value.c_str(), node.date_literal.value.size(), - scale)) { + if (value.from_date_str(node.date_literal.value.c_str(), + cast_set(node.date_literal.value.size()), scale)) { return value.to_date_int_val(); } else { throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT, @@ -181,5 +183,5 @@ template constexpr bool IsDataTypeDateTimeV2 = false; template <> inline constexpr bool IsDataTypeDateTimeV2 = true; - +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/runtime/vdatetime_value.cpp b/be/src/vec/runtime/vdatetime_value.cpp index 797fee1a50db15..f7e72efa0899ea 100644 --- a/be/src/vec/runtime/vdatetime_value.cpp +++ b/be/src/vec/runtime/vdatetime_value.cpp @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -92,11 +93,11 @@ bool VecDateTimeValue::check_date(uint32_t year, uint32_t month, uint32_t day) { // The interval format is that with no delimiters // YYYY-MM-DD HH-MM-DD.FFFFFF AM in default format // 0 1 2 3 4 5 6 7 -bool VecDateTimeValue::from_date_str(const char* date_str, int len) { +bool VecDateTimeValue::from_date_str(const char* date_str, size_t len) { return from_date_str_base(date_str, len, nullptr); } //parse timezone to get offset -bool VecDateTimeValue::from_date_str(const char* date_str, int len, +bool VecDateTimeValue::from_date_str(const char* date_str, size_t len, const cctz::time_zone& local_time_zone) { return from_date_str_base(date_str, len, &local_time_zone); } @@ -3412,7 +3413,7 @@ const char* DateV2Value::day_name() const { template void DateV2Value::unchecked_set_time(uint16_t year, uint8_t month, uint8_t day, uint8_t hour, - uint8_t minute, uint8_t second, uint32_t microsecond) { + uint8_t minute, uint16_t second, uint32_t microsecond) { date_v2_value_.year_ = year; date_v2_value_.month_ = month; date_v2_value_.day_ = day; @@ -3425,7 +3426,7 @@ void DateV2Value::unchecked_set_time(uint16_t year, uint8_t month, uint8_t da } template -void DateV2Value::unchecked_set_time(uint8_t hour, uint8_t minute, uint8_t second, +void DateV2Value::unchecked_set_time(uint8_t hour, uint8_t minute, uint16_t second, uint32_t microsecond) { if constexpr (is_datetime) { date_v2_value_.hour_ = hour; diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h index 9bb8916710299f..2cf7636347851a 100644 --- a/be/src/vec/runtime/vdatetime_value.h +++ b/be/src/vec/runtime/vdatetime_value.h @@ -32,6 +32,7 @@ #include #include +#include "gutil/integral_types.h" #include "util/hash_util.hpp" #include "util/time_lut.h" #include "util/timezone_utils.h" @@ -371,8 +372,8 @@ class VecDateTimeValue { // Now this type is a temp solution with little changes // 'YYMMDD', 'YYYYMMDD', 'YYMMDDHHMMSS', 'YYYYMMDDHHMMSS' // 'YY-MM-DD', 'YYYY-MM-DD', 'YY-MM-DD HH.MM.SS' // 'YYYYMMDDTHHMMSS' - bool from_date_str(const char* str, int len); - bool from_date_str(const char* str, int len, const cctz::time_zone& local_time_zone); + bool from_date_str(const char* str, size_t len); + bool from_date_str(const char* str, size_t len, const cctz::time_zone& local_time_zone); // Construct Date/Datetime type value from int64_t value. // Return true if convert success. Otherwise return false. @@ -428,15 +429,16 @@ class VecDateTimeValue { // Now this type is a temp solution with little changes int64_t daynr() const { return calc_daynr(_year, _month, _day); } - int year() const { return _year; } - int month() const { return _month; } + uint16_t year() const { return _year; } + uint8_t month() const { return _month; } int quarter() const { return (_month - 1) / 3 + 1; } int week() const { return week(mysql_week_mode(0)); } //00-53 - int day() const { return _day; } - int hour() const { return _hour; } - int minute() const { return _minute; } - int second() const { return _second; } - int neg() const { return _neg; } + uint8_t day() const { return _day; } + uint8_t hour() const { return _hour; } + uint8_t minute() const { return _minute; } + uint16_t second() const { return _second; } + uint16_t neg() const { return _neg; } + int64_t time_part_to_seconds() const { return _hour * SECOND_PER_HOUR + _minute * SECOND_PER_MINUTE + _second; } @@ -888,9 +890,9 @@ class DateV2Value { } void unchecked_set_time(uint16_t year, uint8_t month, uint8_t day, uint8_t hour, uint8_t minute, - uint8_t second, uint32_t microsecond = 0); + uint16_t second, uint32_t microsecond = 0); - void unchecked_set_time(uint8_t hour, uint8_t minute, uint8_t second, uint32_t microsecond); + void unchecked_set_time(uint8_t hour, uint8_t minute, uint16_t second, uint32_t microsecond); int64_t daynr() const { return calc_daynr(date_v2_value_.year_, date_v2_value_.month_, date_v2_value_.day_);