-
Notifications
You must be signed in to change notification settings - Fork 448
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
93b76aa
commit 0372389
Showing
9 changed files
with
345 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
#include "NativeReader.h" | ||
|
||
#include <IO/ReadHelpers.h> | ||
#include <IO/VarInt.h> | ||
#include <DataTypes/DataTypeFactory.h> | ||
#include <Columns/ColumnAggregateFunction.h> | ||
#include <Common/Arena.h> | ||
|
||
namespace DB | ||
{ | ||
namespace ErrorCodes | ||
{ | ||
extern const int INCORRECT_INDEX; | ||
extern const int LOGICAL_ERROR; | ||
extern const int CANNOT_READ_ALL_DATA; | ||
extern const int INCORRECT_DATA; | ||
extern const int TOO_LARGE_ARRAY_SIZE; | ||
} | ||
} | ||
|
||
using namespace DB; | ||
|
||
namespace local_engine | ||
{ | ||
void NativeReader::readData(const ISerialization & serialization, ColumnPtr & column, ReadBuffer & istr, size_t rows, double avg_value_size_hint) | ||
{ | ||
ISerialization::DeserializeBinaryBulkSettings settings; | ||
settings.getter = [&](ISerialization::SubstreamPath) -> ReadBuffer * { return &istr; }; | ||
settings.avg_value_size_hint = avg_value_size_hint; | ||
settings.position_independent_encoding = false; | ||
settings.native_format = true; | ||
|
||
ISerialization::DeserializeBinaryBulkStatePtr state; | ||
|
||
serialization.deserializeBinaryBulkStatePrefix(settings, state); | ||
serialization.deserializeBinaryBulkWithMultipleStreams(column, rows, settings, state, nullptr); | ||
|
||
if (column->size() != rows) | ||
throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, | ||
"Cannot read all data in NativeReader. Rows read: {}. Rows expected: {}", column->size(), rows); | ||
} | ||
|
||
void NativeReader::readAggData(const DB::DataTypeAggregateFunction & data_type, DB::ColumnPtr & column, DB::ReadBuffer & istr, size_t rows) | ||
{ | ||
ColumnAggregateFunction & real_column = typeid_cast<ColumnAggregateFunction &>(*column->assumeMutable()); | ||
auto & arena = real_column.createOrGetArena(); | ||
ColumnAggregateFunction::Container & vec = real_column.getData(); | ||
|
||
vec.reserve(rows); | ||
auto agg_function = data_type.getFunction(); | ||
size_t size_of_state = agg_function->sizeOfData(); | ||
size_t align_of_state = agg_function->alignOfData(); | ||
|
||
for (size_t i = 0; i < rows; ++i) | ||
{ | ||
AggregateDataPtr place = arena.alignedAlloc(size_of_state, align_of_state); | ||
|
||
agg_function->create(place); | ||
|
||
auto n = istr.read(place, size_of_state); | ||
chassert(n == size_of_state); | ||
vec.push_back(place); | ||
} | ||
} | ||
|
||
|
||
Block NativeReader::getHeader() const | ||
{ | ||
return header; | ||
} | ||
|
||
Block NativeReader::read() | ||
{ | ||
Block res; | ||
|
||
const DataTypeFactory & data_type_factory = DataTypeFactory::instance(); | ||
|
||
if (istr.eof()) | ||
{ | ||
return res; | ||
} | ||
|
||
/// Dimensions | ||
size_t columns = 0; | ||
size_t rows = 0; | ||
|
||
readVarUInt(columns, istr); | ||
readVarUInt(rows, istr); | ||
|
||
if (columns > 1'000'000uz) | ||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Suspiciously many columns in Native format: {}", columns); | ||
if (rows > 1'000'000'000'000uz) | ||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Suspiciously many rows in Native format: {}", rows); | ||
|
||
if (columns == 0 && !header && rows != 0) | ||
throw Exception(ErrorCodes::INCORRECT_DATA, "Zero columns but {} rows in Native format.", rows); | ||
|
||
for (size_t i = 0; i < columns; ++i) | ||
{ | ||
ColumnWithTypeAndName column; | ||
|
||
column.name = "col_" + std::to_string(i); | ||
|
||
/// Type | ||
String type_name; | ||
readBinary(type_name, istr); | ||
column.type = data_type_factory.get(type_name); | ||
bool is_agg_state_type = isAggregateFunction(column.type); | ||
SerializationPtr serialization = column.type->getDefaultSerialization(); | ||
|
||
/// Data | ||
ColumnPtr read_column = column.type->createColumn(*serialization); | ||
|
||
double avg_value_size_hint = avg_value_size_hints.empty() ? 0 : avg_value_size_hints[i]; | ||
if (rows) /// If no rows, nothing to read. | ||
{ | ||
if (is_agg_state_type) | ||
{ | ||
const DataTypeAggregateFunction * agg_type = checkAndGetDataType<DataTypeAggregateFunction>(column.type.get()); | ||
readAggData(*agg_type, read_column, istr, rows); | ||
} | ||
else | ||
{ | ||
readData(*serialization, read_column, istr, rows, avg_value_size_hint); | ||
} | ||
} | ||
column.column = std::move(read_column); | ||
|
||
res.insert(std::move(column)); | ||
} | ||
|
||
if (res.rows() != rows) | ||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Row count mismatch after deserialization, got: {}, expected: {}", res.rows(), rows); | ||
|
||
return res; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
#pragma once | ||
|
||
#include <Common/PODArray.h> | ||
#include <Core/Block.h> | ||
#include <DataTypes/DataTypeAggregateFunction.h> | ||
|
||
namespace local_engine | ||
{ | ||
|
||
class NativeReader | ||
{ | ||
public: | ||
NativeReader(DB::ReadBuffer & istr_) : istr(istr_) {} | ||
|
||
static void readData(const DB::ISerialization & serialization, DB::ColumnPtr & column, DB::ReadBuffer & istr, size_t rows, double avg_value_size_hint); | ||
static void readAggData(const DB::DataTypeAggregateFunction & data_type, DB::ColumnPtr & column, DB::ReadBuffer & istr, size_t rows); | ||
|
||
DB::Block getHeader() const; | ||
|
||
DB::Block read(); | ||
|
||
private: | ||
DB::ReadBuffer & istr; | ||
DB::Block header; | ||
|
||
DB::PODArray<double> avg_value_size_hints; | ||
|
||
void updateAvgValueSizeHints(const DB::Block & block); | ||
}; | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
#include "NativeWriter.h" | ||
#include <IO/WriteBuffer.h> | ||
#include <IO/WriteHelpers.h> | ||
#include <DataTypes/Serializations/ISerialization.h> | ||
#include <Columns/ColumnSparse.h> | ||
|
||
using namespace DB; | ||
|
||
namespace local_engine | ||
{ | ||
void NativeWriter::flush() | ||
{ | ||
ostr.next(); | ||
} | ||
|
||
static void writeData(const ISerialization & serialization, const ColumnPtr & column, WriteBuffer & ostr, UInt64 offset, UInt64 limit) | ||
{ | ||
/** If there are columns-constants - then we materialize them. | ||
* (Since the data type does not know how to serialize / deserialize constants.) | ||
*/ | ||
ColumnPtr full_column = column->convertToFullColumnIfConst(); | ||
|
||
ISerialization::SerializeBinaryBulkSettings settings; | ||
settings.getter = [&ostr](ISerialization::SubstreamPath) -> WriteBuffer * { return &ostr; }; | ||
settings.position_independent_encoding = false; | ||
settings.low_cardinality_max_dictionary_size = 0; | ||
|
||
ISerialization::SerializeBinaryBulkStatePtr state; | ||
serialization.serializeBinaryBulkStatePrefix(*full_column, settings, state); | ||
serialization.serializeBinaryBulkWithMultipleStreams(*full_column, offset, limit, settings, state); | ||
serialization.serializeBinaryBulkStateSuffix(settings, state); | ||
} | ||
|
||
size_t NativeWriter::write(const DB::Block & block) | ||
{ | ||
size_t written_before = ostr.count(); | ||
|
||
block.checkNumberOfRows(); | ||
|
||
/// Dimensions | ||
size_t columns = block.columns(); | ||
size_t rows = block.rows(); | ||
|
||
writeVarUInt(columns, ostr); | ||
writeVarUInt(rows, ostr); | ||
|
||
for (size_t i = 0; i < columns; ++i) | ||
{ | ||
auto column = block.safeGetByPosition(i); | ||
/// agg state will convert to fixedString, need write actual agg state type | ||
auto original_type = header.safeGetByPosition(i).type; | ||
/// Type | ||
String type_name = original_type->getName(); | ||
|
||
writeStringBinary(type_name, ostr); | ||
|
||
SerializationPtr serialization = column.type->getDefaultSerialization(); | ||
column.column = recursiveRemoveSparse(column.column); | ||
/// Data | ||
if (rows) /// Zero items of data is always represented as zero number of bytes. | ||
writeData(*serialization, column.column, ostr, 0, 0); | ||
} | ||
|
||
size_t written_after = ostr.count(); | ||
size_t written_size = written_after - written_before; | ||
return written_size; | ||
} | ||
} |
Oops, something went wrong.