From 3156fcf58f204a2d61313234d8596b7e1c36a98a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Mon, 4 Mar 2024 23:47:43 +0800 Subject: [PATCH] replace resize/reserve to resize_extact/reserve_exact (#4824) --- .../Functions/SparkFunctionMakeDecimal.cpp | 2 +- .../SparkFunctionRegexpExtractAll.cpp | 26 +++++++++---------- .../SparkFunctionReinterpretAsString.cpp | 2 +- .../Functions/SparkFunctionRoundHalfUp.h | 2 +- .../Functions/SparkFunctionTrim.cpp | 6 ++--- .../local-engine/Functions/SparkParseURL.cpp | 14 +++++----- .../IO/AggregateSerializationUtils.cpp | 6 ++--- .../Storages/IO/CompressedWriteBuffer.cpp | 2 +- .../local-engine/Storages/IO/NativeReader.cpp | 4 +-- .../tests/benchmark_local_engine.cpp | 12 ++++----- 10 files changed, 38 insertions(+), 38 deletions(-) diff --git a/cpp-ch/local-engine/Functions/SparkFunctionMakeDecimal.cpp b/cpp-ch/local-engine/Functions/SparkFunctionMakeDecimal.cpp index cf75345db492..231856b0288f 100644 --- a/cpp-ch/local-engine/Functions/SparkFunctionMakeDecimal.cpp +++ b/cpp-ch/local-engine/Functions/SparkFunctionMakeDecimal.cpp @@ -148,7 +148,7 @@ namespace const auto & vector = typeid_cast *>(arguments[0].column.get()); auto & vec_to = col_to->getData(); auto & datas = vector->getData(); - vec_to.resize(input_rows_count); + vec_to.resize_exact(input_rows_count); for (size_t i = 0; i < input_rows_count; ++i) { diff --git a/cpp-ch/local-engine/Functions/SparkFunctionRegexpExtractAll.cpp b/cpp-ch/local-engine/Functions/SparkFunctionRegexpExtractAll.cpp index ba6eafc61d22..e64a75a5af65 100644 --- a/cpp-ch/local-engine/Functions/SparkFunctionRegexpExtractAll.cpp +++ b/cpp-ch/local-engine/Functions/SparkFunctionRegexpExtractAll.cpp @@ -169,12 +169,12 @@ namespace const auto & match = matches[match_index]; if (match.offset != std::string::npos) { - res_strings_chars.resize(res_strings_offset + match.length + 1); + res_strings_chars.resize_exact(res_strings_offset + match.length + 1); memcpySmallAllowReadWriteOverflow15(&res_strings_chars[res_strings_offset], pos + match.offset, match.length); res_strings_offset += match.length; } else - res_strings_chars.resize(res_strings_offset + 1); + res_strings_chars.resize_exact(res_strings_offset + 1); /// Update offsets of Column:String res_strings_chars[res_strings_offset] = 0; @@ -211,9 +211,9 @@ namespace OptimizedRegularExpression::MatchVec matches; matches.reserve(index + 1); - res_offsets.reserve(offsets.size()); - res_strings_chars.reserve(data.size() / 3); - res_strings_offsets.reserve(offsets.size() * 2); + res_offsets.reserve_exact(offsets.size()); + res_strings_chars.reserve_exact(data.size() / 3); + res_strings_offsets.reserve_exact(offsets.size() * 2); size_t res_offset = 0; size_t res_strings_offset = 0; @@ -253,9 +253,9 @@ namespace OptimizedRegularExpression::MatchVec matches; matches.reserve(capture + 1); - res_offsets.reserve(offsets.size()); - res_strings_chars.reserve(data.size() / 3); - res_strings_offsets.reserve(offsets.size() * 2); + res_offsets.reserve_exact(offsets.size()); + res_strings_chars.reserve_exact(data.size() / 3); + res_strings_offsets.reserve_exact(offsets.size() * 2); size_t res_offset = 0; size_t res_strings_offset = 0; @@ -333,9 +333,9 @@ namespace } size_t rows = column_index->size(); - res_offsets.reserve(rows); - res_strings_chars.reserve(rows * str.size() / 3); - res_strings_offsets.reserve(rows * 2); + res_offsets.reserve_exact(rows); + res_strings_chars.reserve_exact(rows * str.size() / 3); + res_strings_offsets.reserve_exact(rows * 2); size_t res_offset = 0; size_t res_strings_offset = 0; @@ -356,12 +356,12 @@ namespace /// Append matched segment into res_strings_chars if (match.offset != std::string::npos) { - res_strings_chars.resize(res_strings_offset + match.length + 1); + res_strings_chars.resize_exact(res_strings_offset + match.length + 1); memcpySmallAllowReadWriteOverflow15(&res_strings_chars[res_strings_offset], start + match.offset, match.length); res_strings_offset += match.length; } else - res_strings_chars.resize(res_strings_offset + 1); + res_strings_chars.resize_exact(res_strings_offset + 1); /// Update offsets of Column:String res_strings_chars[res_strings_offset] = 0; diff --git a/cpp-ch/local-engine/Functions/SparkFunctionReinterpretAsString.cpp b/cpp-ch/local-engine/Functions/SparkFunctionReinterpretAsString.cpp index be2a3dc0b74a..5c3e5b6d4449 100644 --- a/cpp-ch/local-engine/Functions/SparkFunctionReinterpretAsString.cpp +++ b/cpp-ch/local-engine/Functions/SparkFunctionReinterpretAsString.cpp @@ -77,7 +77,7 @@ namespace size_t rows = src.size(); ColumnString::Chars & data_to = dst_concrete->getChars(); ColumnString::Offsets & offsets_to = dst_concrete->getOffsets(); - offsets_to.resize(rows); + offsets_to.resize_exact(rows); ColumnString::Offset offset = 0; for (size_t i = 0; i < rows; ++i) diff --git a/cpp-ch/local-engine/Functions/SparkFunctionRoundHalfUp.h b/cpp-ch/local-engine/Functions/SparkFunctionRoundHalfUp.h index 01d628c5c63a..ab4faf23575e 100644 --- a/cpp-ch/local-engine/Functions/SparkFunctionRoundHalfUp.h +++ b/cpp-ch/local-engine/Functions/SparkFunctionRoundHalfUp.h @@ -134,7 +134,7 @@ struct DispatcherRoundingHalfUp auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_res = col_res->getData(); - vec_res.resize(col->getData().size()); + vec_res.resize_exact(col->getData().size()); if (!vec_res.empty()) { diff --git a/cpp-ch/local-engine/Functions/SparkFunctionTrim.cpp b/cpp-ch/local-engine/Functions/SparkFunctionTrim.cpp index 81f6808c0596..d8f6be1bfc32 100644 --- a/cpp-ch/local-engine/Functions/SparkFunctionTrim.cpp +++ b/cpp-ch/local-engine/Functions/SparkFunctionTrim.cpp @@ -135,10 +135,10 @@ namespace ColumnString::Offsets & res_offsets, const String & trim_str) const { - res_data.reserve(data.size()); + res_data.reserve_exact(data.size()); size_t rows = offsets.size(); - res_offsets.resize(rows); + res_offsets.resize_exact(rows); size_t prev_offset = 0; size_t res_offset = 0; @@ -149,7 +149,7 @@ namespace for (size_t i = 0; i < rows; ++i) { trim(reinterpret_cast(&data[prev_offset]), offsets[i] - prev_offset - 1, start, length, trim_set); - res_data.resize(res_data.size() + length + 1); + res_data.resize_exact(res_data.size() + length + 1); memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], start, length); res_offset += length + 1; res_data[res_offset - 1] = '\0'; diff --git a/cpp-ch/local-engine/Functions/SparkParseURL.cpp b/cpp-ch/local-engine/Functions/SparkParseURL.cpp index c39c8eaa90f8..1e570d587c94 100644 --- a/cpp-ch/local-engine/Functions/SparkParseURL.cpp +++ b/cpp-ch/local-engine/Functions/SparkParseURL.cpp @@ -45,8 +45,8 @@ struct ExtractNullableSubstringImpl DB::ColumnString::Chars & res_data, DB::ColumnString::Offsets & res_offsets, DB::IColumn & null_map) { size_t size = offsets.size(); - res_offsets.resize(size); - res_data.reserve(size * Extractor::getReserveLengthForElement()); + res_offsets.resize_exact(size); + res_data.reserve_exact(size * Extractor::getReserveLengthForElement()); null_map.reserve(size); size_t prev_offset = 0; @@ -60,7 +60,7 @@ struct ExtractNullableSubstringImpl { Extractor::execute(reinterpret_cast(&data[prev_offset]), offsets[i] - prev_offset - 1, start, length); - res_data.resize(res_data.size() + length + 1); + res_data.resize_exact(res_data.size() + length + 1); if (start) { memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], start, length); @@ -261,8 +261,8 @@ struct SparkExtractURLOneQuery DB::ColumnString::Chars & res_data, DB::ColumnString::Offsets & res_offsets, DB::IColumn & null_map) { const static String protocol_delim = "://"; - res_data.reserve(data.size() / 5); - res_offsets.resize(offsets.size()); + res_data.reserve_exact(data.size() / 5); + res_offsets.resize_exact(offsets.size()); pattern += '='; const char * param_str = pattern.c_str(); @@ -332,7 +332,7 @@ struct SparkExtractURLOneQuery size_t param_size = param_end - param_begin; - res_data.resize(res_offset + param_size + 1); + res_data.resize_exact(res_offset + param_size + 1); memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], param_begin, param_size); res_offset += param_size; null_map.insert(0); @@ -340,7 +340,7 @@ struct SparkExtractURLOneQuery else { /// No parameter found, put empty string in result. - res_data.resize(res_offset + 1); + res_data.resize_exact(res_offset + 1); null_map.insert(1); } diff --git a/cpp-ch/local-engine/Storages/IO/AggregateSerializationUtils.cpp b/cpp-ch/local-engine/Storages/IO/AggregateSerializationUtils.cpp index 6d663afc4fb3..558f570e2c51 100644 --- a/cpp-ch/local-engine/Storages/IO/AggregateSerializationUtils.cpp +++ b/cpp-ch/local-engine/Storages/IO/AggregateSerializationUtils.cpp @@ -62,7 +62,7 @@ DB::ColumnWithTypeAndName convertAggregateStateToFixedString(const DB::ColumnWit auto res_type = std::make_shared(state_size); auto res_col = res_type->createColumn(); PaddedPODArray & column_chars_t = assert_cast(*res_col).getChars(); - column_chars_t.reserve(aggregate_col->size() * state_size); + column_chars_t.reserve_exact(aggregate_col->size() * state_size); for (const auto & item : aggregate_col->getData()) { column_chars_t.insert_assume_reserved(item, item + state_size); @@ -82,7 +82,7 @@ DB::ColumnWithTypeAndName convertAggregateStateToString(const DB::ColumnWithType PaddedPODArray & column_chars = assert_cast(*res_col).getChars(); IColumn::Offsets & column_offsets = assert_cast(*res_col).getOffsets(); auto value_writer = WriteBufferFromVector>(column_chars); - column_offsets.reserve(aggregate_col->size()); + column_offsets.reserve_exact(aggregate_col->size()); for (const auto & item : aggregate_col->getData()) { aggregate_col->getAggregateFunction()->serialize(item, value_writer); @@ -100,7 +100,7 @@ DB::ColumnWithTypeAndName convertFixedStringToAggregateState(const DB::ColumnWit ColumnAggregateFunction & real_column = typeid_cast(*res_col); auto & arena = real_column.createOrGetArena(); ColumnAggregateFunction::Container & vec = real_column.getData(); - vec.reserve(col.column->size()); + vec.reserve_exact(col.column->size()); auto agg_function = agg_type->getFunction(); size_t size_of_state = agg_function->sizeOfData(); size_t align_of_state = agg_function->alignOfData(); diff --git a/cpp-ch/local-engine/Storages/IO/CompressedWriteBuffer.cpp b/cpp-ch/local-engine/Storages/IO/CompressedWriteBuffer.cpp index f58c62321183..582dece6668d 100644 --- a/cpp-ch/local-engine/Storages/IO/CompressedWriteBuffer.cpp +++ b/cpp-ch/local-engine/Storages/IO/CompressedWriteBuffer.cpp @@ -69,7 +69,7 @@ void CompressedWriteBuffer::nextImpl() } else { - compressed_buffer.resize(compressed_reserve_size); + compressed_buffer.resize_exact(compressed_reserve_size); UInt32 compressed_size = codec->compress(working_buffer.begin(), decompressed_size, compressed_buffer.data()); compress_time += compress_time_watch.elapsedNanoseconds(); CityHash_v1_0_2::uint128 checksum_(0,0); diff --git a/cpp-ch/local-engine/Storages/IO/NativeReader.cpp b/cpp-ch/local-engine/Storages/IO/NativeReader.cpp index 6584ef580b9e..f02a1b72f713 100644 --- a/cpp-ch/local-engine/Storages/IO/NativeReader.cpp +++ b/cpp-ch/local-engine/Storages/IO/NativeReader.cpp @@ -88,7 +88,7 @@ static void readFixedSizeAggregateData(DB::ReadBuffer &in, DB::ColumnPtr & colum auto & arena = real_column.createOrGetArena(); ColumnAggregateFunction::Container & vec = real_column.getData(); size_t initial_size = vec.size(); - vec.reserve(initial_size + rows); + vec.reserve_exact(initial_size + rows); for (size_t i = 0; i < rows; ++i) { AggregateDataPtr place = arena.alignedAlloc(column_parse_util.aggregate_state_size, column_parse_util.aggregate_state_align); @@ -105,7 +105,7 @@ static void readVarSizeAggregateData(DB::ReadBuffer &in, DB::ColumnPtr & column, auto & arena = real_column.createOrGetArena(); ColumnAggregateFunction::Container & vec = real_column.getData(); size_t initial_size = vec.size(); - vec.reserve(initial_size + rows); + vec.reserve_exact(initial_size + rows); for (size_t i = 0; i < rows; ++i) { AggregateDataPtr place = arena.alignedAlloc(column_parse_util.aggregate_state_size, column_parse_util.aggregate_state_align); diff --git a/cpp-ch/local-engine/tests/benchmark_local_engine.cpp b/cpp-ch/local-engine/tests/benchmark_local_engine.cpp index 0b64237800ca..a482e4702e90 100644 --- a/cpp-ch/local-engine/tests/benchmark_local_engine.cpp +++ b/cpp-ch/local-engine/tests/benchmark_local_engine.cpp @@ -394,9 +394,9 @@ DB::ContextMutablePtr global_context; PaddedPODArray arr; PaddedPODArray condition; PaddedPODArray res_data; - arr.reserve(n); - condition.reserve(n); - res_data.reserve(n); + arr.reserve_exact(n); + condition.reserve_exact(n); + res_data.reserve_exact(n); for (int i = 0; i < n; i++) { arr.push_back(i); @@ -447,9 +447,9 @@ DB::ContextMutablePtr global_context; PaddedPODArray arr; PaddedPODArray condition; PaddedPODArray res_data; - arr.reserve(n); - condition.reserve(n); - res_data.reserve(n); + arr.reserve_exact(n); + condition.reserve_exact(n); + res_data.reserve_exact(n); for (int i = 0; i < n; i++) { arr.push_back(i);