From 862fa93683fb4486b3c7322ca49832f29f8ef885 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 5 Dec 2023 14:50:00 +0800 Subject: [PATCH] case-insensitive matching for hive udfs --- cpp-ch/local-engine/Parser/FunctionParser.h | 2 +- cpp-ch/local-engine/tests/gtest_parquet_write.cpp | 2 +- .../scala/io/glutenproject/expression/UDFMappings.scala | 4 +++- .../apache/spark/sql/hive/HiveSimpleUDFTransformer.scala | 9 +++++++-- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/cpp-ch/local-engine/Parser/FunctionParser.h b/cpp-ch/local-engine/Parser/FunctionParser.h index e40143126580c..0e9eaf07a49ca 100644 --- a/cpp-ch/local-engine/Parser/FunctionParser.h +++ b/cpp-ch/local-engine/Parser/FunctionParser.h @@ -76,7 +76,7 @@ class FunctionParser { return plan_parser->toFunctionNode(action_dag, func_name, args); } - + const DB::ActionsDAG::Node * toFunctionNode(DB::ActionsDAGPtr & action_dag, const String & func_name, const String & result_name, const DB::ActionsDAG::NodeRawConstPtrs & args) const { diff --git a/cpp-ch/local-engine/tests/gtest_parquet_write.cpp b/cpp-ch/local-engine/tests/gtest_parquet_write.cpp index 3a96b92e77256..c2c71ff6eb076 100644 --- a/cpp-ch/local-engine/tests/gtest_parquet_write.cpp +++ b/cpp-ch/local-engine/tests/gtest_parquet_write.cpp @@ -199,7 +199,7 @@ TEST(ParquetWrite, ComplexTypes) ch2arrow.chChunkToArrowTable(arrow_table, input_chunks, header.columns()); /// Convert Arrow Table to CH Block - ArrowColumnToCHColumn arrow2ch(header, "Parquet", true, true, true); + ArrowColumnToCHColumn arrow2ch(header, "Parquet", true, true, FormatSettings::DateTimeOverflowBehavior::Ignore); Chunk output_chunk; arrow2ch.arrowTableToCHChunk(output_chunk, arrow_table, arrow_table->num_rows()); diff --git a/gluten-core/src/main/scala/io/glutenproject/expression/UDFMappings.scala b/gluten-core/src/main/scala/io/glutenproject/expression/UDFMappings.scala index 8d00d2bbf36d0..4b6adef7c0bd8 100644 --- a/gluten-core/src/main/scala/io/glutenproject/expression/UDFMappings.scala +++ b/gluten-core/src/main/scala/io/glutenproject/expression/UDFMappings.scala @@ -23,6 +23,8 @@ import org.apache.spark.internal.Logging import org.apache.commons.lang3.StringUtils +import java.util.Locale + import scala.collection.mutable.Map object UDFMappings extends Logging { @@ -41,7 +43,7 @@ object UDFMappings extends Logging { s"will be replaced by value:$value") } - res.put(key, value) + res.put(key.toLowerCase(Locale.ROOT), value) } private def parseStringToMap(input: String, res: Map[String, String]) { diff --git a/gluten-core/src/main/scala/org/apache/spark/sql/hive/HiveSimpleUDFTransformer.scala b/gluten-core/src/main/scala/org/apache/spark/sql/hive/HiveSimpleUDFTransformer.scala index 505d7319f739f..77100e5999d01 100644 --- a/gluten-core/src/main/scala/org/apache/spark/sql/hive/HiveSimpleUDFTransformer.scala +++ b/gluten-core/src/main/scala/org/apache/spark/sql/hive/HiveSimpleUDFTransformer.scala @@ -20,6 +20,8 @@ import io.glutenproject.expression.{ExpressionConverter, ExpressionTransformer, import org.apache.spark.sql.catalyst.expressions._ +import java.util.Locale + object HiveSimpleUDFTransformer { def isHiveSimpleUDF(expr: Expression): Boolean = { expr match { @@ -36,7 +38,8 @@ object HiveSimpleUDFTransformer { } val udf = expr.asInstanceOf[HiveSimpleUDF] - val substraitExprName = UDFMappings.hiveUDFMap.get(udf.name.stripPrefix("default.")) + val substraitExprName = + UDFMappings.hiveUDFMap.get(udf.name.stripPrefix("default.").toLowerCase(Locale.ROOT)) substraitExprName match { case Some(name) => GenericExpressionTransformer( @@ -44,7 +47,9 @@ object HiveSimpleUDFTransformer { udf.children.map(ExpressionConverter.replaceWithExpressionTransformer(_, attributeSeq)), udf) case _ => - throw new UnsupportedOperationException(s"Not supported hive simple udf: $udf.") + throw new UnsupportedOperationException( + s"Not supported hive simple udf:$udf" + + s" name:${udf.name} hiveUDFMap:${UDFMappings.hiveUDFMap}") } } }