Skip to content

Commit

Permalink
[chore](type cast) Fix some implicit cast (apache#43050)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhiqiang-hhhh authored Nov 21, 2024
1 parent 2953055 commit bdef601
Show file tree
Hide file tree
Showing 21 changed files with 178 additions and 103 deletions.
5 changes: 4 additions & 1 deletion be/src/vec/data_types/convert_field_to_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <type_traits>
#include <vector>

#include "common/cast_set.h"
#include "common/exception.h"
#include "common/status.h"
#include "util/bitmap_value.h"
Expand All @@ -44,6 +45,7 @@
#include "vec/data_types/data_type_nullable.h"

namespace doris::vectorized {
#include "common/compile_check_begin.h"
/** Checking for a `Field from` of `From` type falls to a range of values of type `To`.
* `From` and `To` - numeric types. They can be floating-point types.
* `From` is one of UInt64, Int64, Float64,
Expand Down Expand Up @@ -257,7 +259,8 @@ void convert_field_to_typeImpl(const Field& src, const IDataType& type,
JsonbWriter writer;
Field::dispatch([&writer](const auto& value) { FieldVisitorToJsonb()(value, &writer); },
src);
*to = JsonbField(writer.getOutput()->getBuffer(), writer.getOutput()->getSize());
*to = JsonbField(writer.getOutput()->getBuffer(),
cast_set<UInt32, size_t, false>(writer.getOutput()->getSize()));
return;
} else if (which_type.is_variant_type()) {
if (src.get_type() == Field::Types::VariantMap) {
Expand Down
4 changes: 3 additions & 1 deletion be/src/vec/data_types/data_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class ReadBuffer;
} // namespace doris

namespace doris::vectorized {
#include "common/compile_check_begin.h"

IDataType::IDataType() = default;

Expand All @@ -58,7 +59,8 @@ void IDataType::update_avg_value_size_hint(const IColumn& column, double& avg_va
/// Update the average value size hint if amount of read rows isn't too small
size_t row_size = column.size();
if (row_size > 10) {
double current_avg_value_size = static_cast<double>(column.byte_size()) / row_size;
double current_avg_value_size =
static_cast<double>(column.byte_size()) / static_cast<double>(row_size);

/// Heuristic is chosen so that avg_value_size_hint increases rapidly but decreases slowly.
if (current_avg_value_size > avg_value_size_hint) {
Expand Down
12 changes: 9 additions & 3 deletions be/src/vec/data_types/data_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <boost/core/noncopyable.hpp>
#include <memory>
#include <string>
#include <type_traits>
#include <vector>

#include "common/exception.h"
Expand All @@ -44,7 +45,7 @@ class PColumnMeta;
enum PGenericType_TypeId : int;

namespace vectorized {

#include "common/compile_check_begin.h"
class IDataType;
class IColumn;
class BufferWritable;
Expand All @@ -58,8 +59,11 @@ class Field;
using DataTypePtr = std::shared_ptr<const IDataType>;
using DataTypes = std::vector<DataTypePtr>;
constexpr auto SERIALIZED_MEM_SIZE_LIMIT = 256;
inline size_t upper_int32(size_t size) {
return size_t((3 + size) / 4.0);

template <typename T>
T upper_int32(T size) {
static_assert(std::is_unsigned_v<T>);
return T(static_cast<double>(3 + size) / 4.0);
}

/** Properties of data type.
Expand Down Expand Up @@ -421,4 +425,6 @@ char* serialize_const_flag_and_row_num(const IColumn** column, char* buf,
const char* deserialize_const_flag_and_row_num(const char* buf, MutableColumnPtr* column,
size_t* real_have_saved_num);
} // namespace vectorized

#include "common/compile_check_end.h"
} // namespace doris
4 changes: 2 additions & 2 deletions be/src/vec/data_types/data_type_bitmap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
#include "vec/io/io_helper.h"

namespace doris::vectorized {

#include "common/compile_check_begin.h"
// binary: const flag| row num | real saved num | size array | bitmap array
// <size array>: bitmap1 size | bitmap2 size | ...
// <bitmap array>: bitmap1 | bitmap2 | ...
Expand Down Expand Up @@ -159,7 +159,7 @@ MutableColumnPtr DataTypeBitMap::create_column() const {
void DataTypeBitMap::serialize_as_stream(const BitmapValue& cvalue, BufferWritable& buf) {
auto& value = const_cast<BitmapValue&>(cvalue);
std::string memory_buffer;
int bytesize = value.getSizeInBytes();
size_t bytesize = value.getSizeInBytes();
memory_buffer.resize(bytesize);
value.write_to(const_cast<char*>(memory_buffer.data()));
write_string_binary(memory_buffer, buf);
Expand Down
3 changes: 2 additions & 1 deletion be/src/vec/data_types/data_type_date.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "vec/data_types/serde/data_type_date64_serde.h"

namespace doris {
#include "common/compile_check_begin.h"
namespace vectorized {
class BufferWritable;
class ReadBuffer;
Expand Down Expand Up @@ -92,5 +93,5 @@ class DataTypeDate final : public DataTypeNumberBase<Int64> {
return std::make_shared<DataTypeDate64SerDe>(nesting_level);
}
};

#include "common/compile_check_end.h"
} // namespace doris::vectorized
2 changes: 2 additions & 0 deletions be/src/vec/data_types/data_type_date_time.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class DataTypeDateV2;
} // namespace doris

namespace doris::vectorized {
#include "common/compile_check_begin.h"

/** DateTime stores time as unix timestamp.
* The value itself is independent of time zone.
Expand Down Expand Up @@ -143,4 +144,5 @@ constexpr bool IsTimeType = IsDateTimeType<DataType> || IsDateType<DataType>;
template <typename DataType>
constexpr bool IsTimeV2Type = IsDateTimeV2Type<DataType> || IsDateV2Type<DataType>;

#include "common/compile_check_end.h"
} // namespace doris::vectorized
26 changes: 17 additions & 9 deletions be/src/vec/data_types/data_type_decimal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,12 @@
#include <streamvbyte.h>
#include <sys/types.h>

#include <cstddef>
#include <cstdint>
#include <cstring>

#include "agent/be_exec_version_manager.h"
#include "common/cast_set.h"
#include "runtime/decimalv2_value.h"
#include "util/string_parser.hpp"
#include "vec/columns/column.h"
Expand All @@ -38,11 +41,12 @@
#include "vec/common/string_buffer.hpp"
#include "vec/common/typeid_cast.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type.h"
#include "vec/io/io_helper.h"
#include "vec/io/reader_buffer.h"

namespace doris::vectorized {

#include "common/compile_check_begin.h"
template <typename T>
std::string DataTypeDecimal<T>::do_get_name() const {
std::stringstream ss;
Expand Down Expand Up @@ -124,7 +128,9 @@ void DataTypeDecimal<T>::to_string_batch_impl(const ColumnPtr& column_ptr,
auto str = value.to_string(get_format_scale());
chars.insert(str.begin(), str.end());
}
offsets[row_num] = chars.size();

// cast by row, so not use cast_set for performance issue
offsets[row_num] = static_cast<UInt32>(chars.size());
}
}

Expand Down Expand Up @@ -157,20 +163,22 @@ int64_t DataTypeDecimal<T>::get_uncompressed_serialized_bytes(const IColumn& col
if (be_exec_version >= USE_CONST_SERDE) {
auto size = sizeof(bool) + sizeof(size_t) + sizeof(size_t);
auto real_need_copy_num = is_column_const(column) ? 1 : column.size();
auto mem_size = sizeof(T) * real_need_copy_num;
auto mem_size = cast_set<UInt32>(sizeof(T) * real_need_copy_num);
if (mem_size <= SERIALIZED_MEM_SIZE_LIMIT) {
return size + mem_size;
} else {
return size + sizeof(size_t) +
std::max(mem_size, streamvbyte_max_compressedbytes(upper_int32(mem_size)));
std::max(cast_set<size_t>(mem_size),
streamvbyte_max_compressedbytes(upper_int32(mem_size)));
}
} else {
auto size = sizeof(T) * column.size();
if (size <= SERIALIZED_MEM_SIZE_LIMIT) {
return sizeof(uint32_t) + size;
} else {
return sizeof(uint32_t) + sizeof(size_t) +
std::max(size, streamvbyte_max_compressedbytes(upper_int32(size)));
std::max(size,
streamvbyte_max_compressedbytes(cast_set<UInt32>(upper_int32(size))));
}
}
}
Expand All @@ -183,7 +191,7 @@ char* DataTypeDecimal<T>::serialize(const IColumn& column, char* buf, int be_exe
buf = serialize_const_flag_and_row_num(&data_column, buf, &real_need_copy_num);

// mem_size = real_need_copy_num * sizeof(T)
const uint32_t mem_size = real_need_copy_num * sizeof(T);
UInt32 mem_size = cast_set<UInt32>(real_need_copy_num * sizeof(T));
const auto* origin_data =
assert_cast<const ColumnDecimal<T>&>(*data_column).get_data().data();

Expand All @@ -201,7 +209,7 @@ char* DataTypeDecimal<T>::serialize(const IColumn& column, char* buf, int be_exe
}
} else {
// row num
const auto mem_size = column.size() * sizeof(T);
UInt32 mem_size = cast_set<UInt32>(column.size() * sizeof(T));
*reinterpret_cast<uint32_t*>(buf) = mem_size;
buf += sizeof(uint32_t);
// column data
Expand Down Expand Up @@ -230,7 +238,7 @@ const char* DataTypeDecimal<T>::deserialize(const char* buf, MutableColumnPtr* c
buf = deserialize_const_flag_and_row_num(buf, column, &real_have_saved_num);

// column data
auto mem_size = real_have_saved_num * sizeof(T);
UInt32 mem_size = cast_set<UInt32>(real_have_saved_num * sizeof(T));
auto& container = assert_cast<ColumnDecimal<T>*>(origin_column)->get_data();
container.resize(real_have_saved_num);
if (mem_size <= SERIALIZED_MEM_SIZE_LIMIT) {
Expand Down Expand Up @@ -289,7 +297,7 @@ template <typename T>
bool DataTypeDecimal<T>::parse_from_string(const std::string& str, T* res) const {
StringParser::ParseResult result = StringParser::PARSE_SUCCESS;
res->value = StringParser::string_to_decimal<DataTypeDecimalSerDe<T>::get_primitive_type()>(
str.c_str(), str.size(), precision, scale, &result);
str.c_str(), cast_set<Int32>(str.size()), precision, scale, &result);
return result == StringParser::PARSE_SUCCESS || result == StringParser::PARSE_UNDERFLOW;
}

Expand Down
14 changes: 7 additions & 7 deletions be/src/vec/data_types/data_type_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
#include "vec/data_types/data_type_time_v2.h"

namespace doris::vectorized {

#include "common/compile_check_begin.h"
DataTypePtr DataTypeFactory::create_data_type(const doris::Field& col_desc) {
return create_data_type(col_desc.get_desc(), col_desc.is_nullable());
}
Expand All @@ -76,7 +76,7 @@ DataTypePtr DataTypeFactory::create_data_type(const TabletColumn& col_desc, bool
DataTypePtr nested = nullptr;
if (col_desc.type() == FieldType::OLAP_FIELD_TYPE_AGG_STATE) {
DataTypes dataTypes;
for (size_t i = 0; i < col_desc.get_subtype_count(); i++) {
for (UInt32 i = 0; i < col_desc.get_subtype_count(); i++) {
dataTypes.push_back(create_data_type(col_desc.get_sub_column(i)));
}
nested = std::make_shared<vectorized::DataTypeAggState>(
Expand All @@ -97,7 +97,7 @@ DataTypePtr DataTypeFactory::create_data_type(const TabletColumn& col_desc, bool
Strings names;
dataTypes.reserve(col_size);
names.reserve(col_size);
for (size_t i = 0; i < col_size; i++) {
for (UInt32 i = 0; i < col_size; i++) {
dataTypes.push_back(create_data_type(col_desc.get_sub_column(i)));
names.push_back(col_desc.get_sub_column(i).name());
}
Expand Down Expand Up @@ -546,13 +546,13 @@ DataTypePtr DataTypeFactory::create_data_type(const PColumnMeta& pcolumn) {
create_data_type(pcolumn.children(1)));
break;
case PGenericType::STRUCT: {
size_t col_size = pcolumn.children_size();
int col_size = pcolumn.children_size();
DCHECK(col_size >= 1);
DataTypes dataTypes;
Strings names;
dataTypes.reserve(col_size);
names.reserve(col_size);
for (size_t i = 0; i < col_size; i++) {
for (int i = 0; i < col_size; i++) {
dataTypes.push_back(create_data_type(pcolumn.children(i)));
names.push_back(pcolumn.children(i).name());
}
Expand Down Expand Up @@ -615,10 +615,10 @@ DataTypePtr DataTypeFactory::create_data_type(const segment_v2::ColumnMetaPB& pc
create_data_type(pcolumn.children_columns(1)));
} else if (pcolumn.type() == static_cast<int>(FieldType::OLAP_FIELD_TYPE_STRUCT)) {
DCHECK_GE(pcolumn.children_columns().size(), 1);
size_t col_size = pcolumn.children_columns().size();
Int32 col_size = pcolumn.children_columns().size();
DataTypes dataTypes(col_size);
Strings names(col_size);
for (size_t i = 0; i < col_size; i++) {
for (Int32 i = 0; i < col_size; i++) {
dataTypes[i] = create_data_type(pcolumn.children_columns(i));
}
nested = std::make_shared<DataTypeStruct>(dataTypes, names);
Expand Down
5 changes: 4 additions & 1 deletion be/src/vec/data_types/data_type_fixed_length_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,13 @@
#include <ostream>

#include "agent/be_exec_version_manager.h"
#include "common/cast_set.h"
#include "vec/columns/column.h"
#include "vec/common/assert_cast.h"
#include "vec/core/types.h"

namespace doris::vectorized {
#include "common/compile_check_begin.h"

char* DataTypeFixedLengthObject::serialize(const IColumn& column, char* buf,
int be_exec_version) const {
Expand Down Expand Up @@ -62,7 +65,7 @@ char* DataTypeFixedLengthObject::serialize(const IColumn& column, char* buf,
return buf;
} else {
// row num
const auto row_num = column.size();
const UInt32 row_num = cast_set<UInt32>(column.size());
*reinterpret_cast<uint32_t*>(buf) = row_num;
buf += sizeof(uint32_t);
// column data
Expand Down
7 changes: 5 additions & 2 deletions be/src/vec/data_types/data_type_jsonb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@
#include <typeinfo>
#include <utility>

#include "common/cast_set.h"
#include "util/jsonb_utils.h"
#include "vec/columns/column_const.h"
#include "vec/common/assert_cast.h"
#include "vec/common/string_buffer.hpp"
#include "vec/common/string_ref.h"
#include "vec/core/types.h"
#include "vec/io/reader_buffer.h"

namespace doris {
Expand All @@ -34,7 +36,7 @@ class IColumn;
} // namespace doris

namespace doris::vectorized {

#include "common/compile_check_begin.h"
std::string DataTypeJsonb::to_string(const IColumn& column, size_t row_num) const {
auto result = check_column_const_set_readability(column, row_num);
ColumnPtr ptr = result.first;
Expand All @@ -59,7 +61,8 @@ void DataTypeJsonb::to_string(const class doris::vectorized::IColumn& column, si

Status DataTypeJsonb::from_string(ReadBuffer& rb, IColumn* column) const {
JsonBinaryValue value;
RETURN_IF_ERROR(value.from_json_string(rb.position(), rb.count()));
// Throw exception if rb.count is large than INT32_MAX
RETURN_IF_ERROR(value.from_json_string(rb.position(), cast_set<Int32>(rb.count())));

auto* column_string = static_cast<ColumnString*>(column);
column_string->insert_data(value.value(), value.size());
Expand Down
9 changes: 7 additions & 2 deletions be/src/vec/data_types/data_type_jsonb.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <memory>
#include <string>

#include "common/cast_set.h"
#include "common/status.h"
#include "runtime/define_primitive_type.h"
#include "runtime/jsonb_value.h"
Expand All @@ -45,6 +46,7 @@ class ReadBuffer;
} // namespace doris

namespace doris::vectorized {
#include "common/compile_check_begin.h"
class DataTypeJsonb final : public IDataType {
public:
using ColumnType = ColumnString;
Expand All @@ -70,8 +72,9 @@ class DataTypeJsonb final : public IDataType {

virtual Field get_default() const override {
std::string default_json = "null";
JsonBinaryValue binary_val(default_json.c_str(), default_json.size());
return JsonbField(binary_val.value(), binary_val.size());
JsonBinaryValue binary_val(default_json.c_str(), static_cast<Int32>(default_json.size()));
// Throw exception if default_json.size() is large than INT32_MAX
return JsonbField(binary_val.value(), cast_set<Int32>(binary_val.size()));
}

Field get_field(const TExprNode& node) const override {
Expand Down Expand Up @@ -100,4 +103,6 @@ class DataTypeJsonb final : public IDataType {
private:
DataTypeString data_type_string;
};

#include "common/compile_check_end.h"
} // namespace doris::vectorized
Loading

0 comments on commit bdef601

Please sign in to comment.