Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
Yohahaha committed Dec 7, 2023
1 parent 9c5314f commit e16a443
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -675,4 +675,16 @@ class TestOperator extends VeloxWholeStageTransformerSuite with AdaptiveSparkPla
}
}
}

test("Verify custom tokenizer") {
// https://github.com/apache/spark/pull/35229 Spark remove parquet column name check after 3.2
if (!spark.version.contains("3.2")) {
withTable("t") {
sql("create table t using parquet as select sum(l_partkey) from lineitem")
runQueryAndCompare("select * from t") {
checkOperatorMatch[FileSourceScanExecTransformer]
}
}
}
}
}
28 changes: 0 additions & 28 deletions cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,27 +50,6 @@ static const std::unordered_set<std::string> kBlackList = {
"arrays_overlap",
"approx_percentile"};

bool validateColNames(const ::substrait::NamedStruct& schema) {
// Keeps the same logic with 'Tokenizer::isUnquotedPathCharacter' at
// https://github.com/oap-project/velox/blob/update/velox/type/Tokenizer.cpp#L154-L157.
auto isUnquotedPathCharacter = [](char c) {
return c == ':' || c == '$' || c == '-' || c == '/' || c == '@' || c == '|' || c == '#' || c == '.' || c == '-' ||
c == '_' || isalnum(c);
};

for (const auto& name : schema.names()) {
for (auto i = 0; i < name.size(); i++) {
auto c = name[i];
if (!isUnquotedPathCharacter(c)) {
std::cout << "native validation failed due to: Illegal column charactor " << c << "in column " << name
<< std::endl;
return false;
}
}
}
return true;
}

} // namespace

bool SubstraitToVeloxPlanValidator::validateInputTypes(
Expand Down Expand Up @@ -1160,13 +1139,6 @@ bool SubstraitToVeloxPlanValidator::validate(const ::substrait::ReadRel& readRel
}
}

if (readRel.has_base_schema()) {
const auto& baseSchema = readRel.base_schema();
if (!validateColNames(baseSchema)) {
logValidateMsg("native validation failed due to: Validation failed for column name contains illegal charactor.");
return false;
}
}
return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ object ConverterUtils extends Logging {
if (caseSensitive) name else name.toLowerCase(Locale.ROOT)
}

def getAttributeName(attr: Attribute): String = {
normalizeColName(attr.name)
}

def getShortAttributeName(attr: Attribute): String = {
val name = normalizeColName(attr.name)
val subIndex = name.indexOf("(")
Expand Down Expand Up @@ -123,7 +127,7 @@ object ConverterUtils extends Logging {
// TODO: This is used only by `BasicScanExecTransformer`,
// perhaps we can remove this in the future and use `withExprId` version consistently.
def collectAttributeNamesWithoutExprId(attributes: Seq[Attribute]): JList[String] = {
collectAttributeNamesDFS(attributes)(genColumnNameWithoutExprId)
collectAttributeNamesDFS(attributes)(getAttributeName)
}

private def collectAttributeNamesDFS(attributes: Seq[Attribute])(
Expand Down

0 comments on commit e16a443

Please sign in to comment.