Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize RowContainer get fixed column width #506

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 29 additions & 15 deletions velox/exec/RowContainer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,14 @@ RowContainer::RowContainer(
typeKinds_.push_back(type->kind());
types_.push_back(type);
offsets_.push_back(offset);
offset += typeKindSize(type->kind());
const auto typeSize = typeKindSize(type->kind());
offset += typeSize;
nullOffsets_.push_back(nullOffset);
if (type->isFixedWidth()) {
fixedColumnWidth_.push_back(typeSize);
} else {
fixedColumnWidth_.push_back(std::nullopt);
}
isVariableWidth |= !type->isFixedWidth();
if (nullableKeys_) {
++nullOffset;
Expand Down Expand Up @@ -216,6 +222,11 @@ RowContainer::RowContainer(
for (auto& type : dependentTypes) {
types_.push_back(type);
typeKinds_.push_back(type->kind());
if (type->isFixedWidth()) {
fixedColumnWidth_.push_back(typeKindSize(type->kind()));
} else {
fixedColumnWidth_.push_back(std::nullopt);
}
nullOffsets_.push_back(nullOffset);
++nullOffset;
isVariableWidth |= !type->isFixedWidth();
Expand Down Expand Up @@ -617,7 +628,8 @@ int32_t RowContainer::variableSizeAt(const char* row, column_index_t column)
}

int32_t RowContainer::fixedSizeAt(column_index_t column) const {
return typeKindSize(typeKinds_[column]);
VELOX_DCHECK(fixedColumnWidth_[column].has_value());
return fixedColumnWidth_[column].value();
}

int32_t RowContainer::extractVariableSizeAt(
Expand Down Expand Up @@ -707,9 +719,9 @@ void RowContainer::extractSerializedRows(
size_t fixedWidthRowSize = 0;
bool hasVariableWidth = false;
for (auto i = 0; i < types_.size(); ++i) {
const auto& type = types_[i];
if (type->isFixedWidth()) {
fixedWidthRowSize += typeKindSize(type->kind());
const auto width = fixedColumnWidth_[i];
if (width.has_value()) {
fixedWidthRowSize += width.value();
} else {
hasVariableWidth = true;
}
Expand Down Expand Up @@ -746,11 +758,11 @@ void RowContainer::extractSerializedRows(

// Copy values.
for (auto j = 0; j < types_.size(); ++j) {
const auto& type = types_[j];
if (type->isFixedWidth()) {
const auto size = typeKindSize(type->kind());
::memcpy(rawBuffer + offset, row + rowColumns_[j].offset(), size);
offset += size;
const auto width = fixedColumnWidth_[i];
if (width.has_value()) {
::memcpy(
rawBuffer + offset, row + rowColumns_[j].offset(), width.value());
offset += width.value();
} else {
auto size = extractVariableSizeAt(row, j, rawBuffer + offset);
offset += size;
Expand Down Expand Up @@ -778,11 +790,13 @@ void RowContainer::storeSerializedRow(

RowSizeTracker tracker(row[rowSizeOffset_], *stringAllocator_);
for (auto i = 0; i < types_.size(); ++i) {
const auto& type = types_[i];
if (type->isFixedWidth()) {
const auto size = typeKindSize(type->kind());
::memcpy(row + rowColumns_[i].offset(), serialized.data() + offset, size);
offset += size;
const auto width = fixedColumnWidth_[i];
if (width.has_value()) {
::memcpy(
row + rowColumns_[i].offset(),
serialized.data() + offset,
width.value());
offset += width.value();
} else {
const auto size = storeVariableSizeAt(serialized.data() + offset, row, i);
offset += size;
Expand Down
4 changes: 4 additions & 0 deletions velox/exec/RowContainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -1498,6 +1498,8 @@ class RowContainer {
// to 'typeKinds_' and 'rowColumns_'.
std::vector<TypePtr> types_;
std::vector<TypeKind> typeKinds_;
// Width of fixed size fields. std::nullopt for variable width fields.
std::vector<std::optional<int32_t>> fixedColumnWidth_;
int32_t nextOffset_ = 0;
// Indicates if this row container has rows with duplicate keys. This only
// applies if 'nextOffset_' is set.
Expand All @@ -1510,10 +1512,12 @@ class RowContainer {
// Offset and null indicator offset of non-aggregate fields as a single word.
// Corresponds pairwise to 'types_'.
std::vector<RowColumn> rowColumns_;

// Optional aggregated column stats(e.g. min/max size) for non-aggregate
// fields. Index aligns with 'rowColumns_'. Column stats will only be enabled
// if 'collectColumnStats_' is true.
std::vector<RowColumn::Stats> rowColumnsStats_;

// Bit offset of the probed flag for a full or right outer join payload. 0 if
// not applicable.
int32_t probedFlagOffset_ = 0;
Expand Down
Loading